diff mbox series

[v2,16/37] drm/i915/lmem: support pread

Message ID 20190627205633.1143-17-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce memory region concept (including device local memory) | expand

Commit Message

Matthew Auld June 27, 2019, 8:56 p.m. UTC
We need to add support for pread'ing an LMEM object.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  2 +
 drivers/gpu/drm/i915/i915_gem.c               |  6 ++
 drivers/gpu/drm/i915/intel_region_lmem.c      | 76 +++++++++++++++++++
 3 files changed, 84 insertions(+)

Comments

Chris Wilson June 27, 2019, 11:50 p.m. UTC | #1
Quoting Matthew Auld (2019-06-27 21:56:12)
> We need to add support for pread'ing an LMEM object.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  2 +
>  drivers/gpu/drm/i915/i915_gem.c               |  6 ++
>  drivers/gpu/drm/i915/intel_region_lmem.c      | 76 +++++++++++++++++++
>  3 files changed, 84 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 80ff5ad9bc07..8cdee185251a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
>         void (*truncate)(struct drm_i915_gem_object *obj);
>         void (*writeback)(struct drm_i915_gem_object *obj);
>  
> +       int (*pread)(struct drm_i915_gem_object *,
> +                    const struct drm_i915_gem_pread *arg);
>         int (*pwrite)(struct drm_i915_gem_object *obj,
>                       const struct drm_i915_gem_pwrite *arg);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 85677ae89849..4ba386ab35e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -463,6 +463,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>  
>         trace_i915_gem_object_pread(obj, args->offset, args->size);
>  
> +       ret = -ENODEV;
> +       if (obj->ops->pread)
> +               ret = obj->ops->pread(obj, args);
> +       if (ret != -ENODEV)
> +               goto out;
> +
>         ret = i915_gem_object_wait(obj,
>                                    I915_WAIT_INTERRUPTIBLE,
>                                    MAX_SCHEDULE_TIMEOUT);
> diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c
> index 701bcac3479e..54b2c7bf177d 100644
> --- a/drivers/gpu/drm/i915/intel_region_lmem.c
> +++ b/drivers/gpu/drm/i915/intel_region_lmem.c
> @@ -7,10 +7,86 @@
>  #include "intel_memory_region.h"
>  #include "intel_region_lmem.h"
>  
> +static int lmem_pread(struct drm_i915_gem_object *obj,
> +                     const struct drm_i915_gem_pread *arg)
> +{
> +       struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +       struct intel_runtime_pm *rpm = &i915->runtime_pm;
> +       intel_wakeref_t wakeref;
> +       struct dma_fence *fence;
> +       char __user *user_data;
> +       unsigned int offset;
> +       unsigned long idx;
> +       u64 remain;
> +       int ret;
> +
> +       ret = i915_gem_object_pin_pages(obj);
> +       if (ret)
> +               return ret;
> +
> +       i915_gem_object_lock(obj);
> +       ret = i915_gem_object_set_to_wc_domain(obj, false);

You chose to opt out of the unlocked wait before the locked wait?


> +       if (ret) {
> +               i915_gem_object_unlock(obj);
> +               goto out_unpin;
> +       }
> +
> +       fence = i915_gem_object_lock_fence(obj);
> +       i915_gem_object_unlock(obj);
> +       if (!fence) {
> +               ret = -ENOMEM;
> +               goto out_unpin;
> +       }
> +
> +       wakeref = intel_runtime_pm_get(rpm);

Something not mentioned so far is the story for mm->rpm.

> +       remain = arg->size;
> +       user_data = u64_to_user_ptr(arg->data_ptr);
> +       offset = offset_in_page(arg->offset);
> +       for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) {
> +               unsigned long unwritten;
> +               void __iomem *vaddr;
> +               int length;
> +
> +               length = remain;
> +               if (offset + length > PAGE_SIZE)
> +                       length = PAGE_SIZE - offset;
> +
> +               vaddr = i915_gem_object_lmem_io_map_page(obj, idx);
> +               if (!vaddr) {
> +                       ret = -ENOMEM;
> +                       goto out_put;
> +               }
> +
> +               unwritten = copy_to_user(user_data,

Except this is a secret atomic section!!!

> +                                        (void __force *)vaddr + offset,
> +                                        length);
> +               io_mapping_unmap_atomic(vaddr);
> +               if (unwritten) {
> +                       ret = -EFAULT;
> +                       goto out_put;
> +               }
> +
> +               remain -= length;
> +               user_data += length;
> +               offset = 0;
> +       }
> +
> +out_put:
> +       intel_runtime_pm_put(rpm, wakeref);
> +       i915_gem_object_unlock_fence(obj, fence);
> +out_unpin:
> +       i915_gem_object_unpin_pages(obj);
> +
> +       return ret;
> +}
> +
>  static const struct drm_i915_gem_object_ops region_lmem_obj_ops = {
>         .get_pages = i915_memory_region_get_pages_buddy,
>         .put_pages = i915_memory_region_put_pages_buddy,
>         .release = i915_gem_object_release_memory_region,
> +
> +       .pread = lmem_pread,
>  };
>  
>  static struct drm_i915_gem_object *
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter July 30, 2019, 8:58 a.m. UTC | #2
On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote:
> We need to add support for pread'ing an LMEM object.

Why? Usage outside from igts seems pretty dead, at least looking at iris
and anv. This was kinda a neat thing for when we didn't yet realized that
doing clflush in userspace is both possible and more efficient.

Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I
thought mesa plan is to drop the old classic driver for when we'll need
lmem. It's not much, but would allow us to drop a few things.
-Daniel

> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  2 +
>  drivers/gpu/drm/i915/i915_gem.c               |  6 ++
>  drivers/gpu/drm/i915/intel_region_lmem.c      | 76 +++++++++++++++++++
>  3 files changed, 84 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 80ff5ad9bc07..8cdee185251a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
>  	void (*truncate)(struct drm_i915_gem_object *obj);
>  	void (*writeback)(struct drm_i915_gem_object *obj);
>  
> +	int (*pread)(struct drm_i915_gem_object *,
> +		     const struct drm_i915_gem_pread *arg);
>  	int (*pwrite)(struct drm_i915_gem_object *obj,
>  		      const struct drm_i915_gem_pwrite *arg);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 85677ae89849..4ba386ab35e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -463,6 +463,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>  
>  	trace_i915_gem_object_pread(obj, args->offset, args->size);
>  
> +	ret = -ENODEV;
> +	if (obj->ops->pread)
> +		ret = obj->ops->pread(obj, args);
> +	if (ret != -ENODEV)
> +		goto out;
> +
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE,
>  				   MAX_SCHEDULE_TIMEOUT);
> diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c
> index 701bcac3479e..54b2c7bf177d 100644
> --- a/drivers/gpu/drm/i915/intel_region_lmem.c
> +++ b/drivers/gpu/drm/i915/intel_region_lmem.c
> @@ -7,10 +7,86 @@
>  #include "intel_memory_region.h"
>  #include "intel_region_lmem.h"
>  
> +static int lmem_pread(struct drm_i915_gem_object *obj,
> +		      const struct drm_i915_gem_pread *arg)
> +{
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	struct intel_runtime_pm *rpm = &i915->runtime_pm;
> +	intel_wakeref_t wakeref;
> +	struct dma_fence *fence;
> +	char __user *user_data;
> +	unsigned int offset;
> +	unsigned long idx;
> +	u64 remain;
> +	int ret;
> +
> +	ret = i915_gem_object_pin_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_lock(obj);
> +	ret = i915_gem_object_set_to_wc_domain(obj, false);
> +	if (ret) {
> +		i915_gem_object_unlock(obj);
> +		goto out_unpin;
> +	}
> +
> +	fence = i915_gem_object_lock_fence(obj);
> +	i915_gem_object_unlock(obj);
> +	if (!fence) {
> +		ret = -ENOMEM;
> +		goto out_unpin;
> +	}
> +
> +	wakeref = intel_runtime_pm_get(rpm);
> +
> +	remain = arg->size;
> +	user_data = u64_to_user_ptr(arg->data_ptr);
> +	offset = offset_in_page(arg->offset);
> +	for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) {
> +		unsigned long unwritten;
> +		void __iomem *vaddr;
> +		int length;
> +
> +		length = remain;
> +		if (offset + length > PAGE_SIZE)
> +			length = PAGE_SIZE - offset;
> +
> +		vaddr = i915_gem_object_lmem_io_map_page(obj, idx);
> +		if (!vaddr) {
> +			ret = -ENOMEM;
> +			goto out_put;
> +		}
> +
> +		unwritten = copy_to_user(user_data,
> +					 (void __force *)vaddr + offset,
> +					 length);
> +		io_mapping_unmap_atomic(vaddr);
> +		if (unwritten) {
> +			ret = -EFAULT;
> +			goto out_put;
> +		}
> +
> +		remain -= length;
> +		user_data += length;
> +		offset = 0;
> +	}
> +
> +out_put:
> +	intel_runtime_pm_put(rpm, wakeref);
> +	i915_gem_object_unlock_fence(obj, fence);
> +out_unpin:
> +	i915_gem_object_unpin_pages(obj);
> +
> +	return ret;
> +}
> +
>  static const struct drm_i915_gem_object_ops region_lmem_obj_ops = {
>  	.get_pages = i915_memory_region_get_pages_buddy,
>  	.put_pages = i915_memory_region_put_pages_buddy,
>  	.release = i915_gem_object_release_memory_region,
> +
> +	.pread = lmem_pread,
>  };
>  
>  static struct drm_i915_gem_object *
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Matthew Auld July 30, 2019, 9:25 a.m. UTC | #3
On 30/07/2019 09:58, Daniel Vetter wrote:
> On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote:
>> We need to add support for pread'ing an LMEM object.
> 
> Why? Usage outside from igts seems pretty dead, at least looking at iris
> and anv. This was kinda a neat thing for when we didn't yet realized that
> doing clflush in userspace is both possible and more efficient.
> 
> Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I
> thought mesa plan is to drop the old classic driver for when we'll need
> lmem. It's not much, but would allow us to drop a few things.

Hmm, it was at least useful in the super early days for debugging. If we 
were to drop this what do we do with the igts? Just use mmap?
Daniel Vetter July 30, 2019, 9:50 a.m. UTC | #4
On Tue, Jul 30, 2019 at 10:25:10AM +0100, Matthew Auld wrote:
> On 30/07/2019 09:58, Daniel Vetter wrote:
> > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote:
> > > We need to add support for pread'ing an LMEM object.
> > 
> > Why? Usage outside from igts seems pretty dead, at least looking at iris
> > and anv. This was kinda a neat thing for when we didn't yet realized that
> > doing clflush in userspace is both possible and more efficient.
> > 
> > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I
> > thought mesa plan is to drop the old classic driver for when we'll need
> > lmem. It's not much, but would allow us to drop a few things.
> 
> Hmm, it was at least useful in the super early days for debugging. If we
> were to drop this what do we do with the igts? Just use mmap?

wc mmap is probably simplest. I think we could do a compat function in igt
that does that when pwrite isn't available. Could also have that in
libdrm_intel, in case some of the UMDs have a hard time getting off it.
-Daniel
Chris Wilson July 30, 2019, 12:05 p.m. UTC | #5
Quoting Daniel Vetter (2019-07-30 09:58:22)
> On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote:
> > We need to add support for pread'ing an LMEM object.
> 
> Why? Usage outside from igts seems pretty dead, at least looking at iris
> and anv. This was kinda a neat thing for when we didn't yet realized that
> doing clflush in userspace is both possible and more efficient.
> 
> Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I
> thought mesa plan is to drop the old classic driver for when we'll need
> lmem. It's not much, but would allow us to drop a few things.

From the opposite perspective, it should only be a wrapper around code
that is being used internally for similar transfers. (One side-effect is
that it can be used to poke more directly at those internals.) It is also
not clear what the preferred strategy will be in future, especially as
people start discussing migration-on-pagefault.

It comes down to whether the maintenance burden of maintaining a
consistent API is worth the maintenance burden of not!
-Chris
Daniel Vetter July 30, 2019, 12:42 p.m. UTC | #6
On Tue, Jul 30, 2019 at 2:05 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Quoting Daniel Vetter (2019-07-30 09:58:22)
> > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote:
> > > We need to add support for pread'ing an LMEM object.
> >
> > Why? Usage outside from igts seems pretty dead, at least looking at iris
> > and anv. This was kinda a neat thing for when we didn't yet realized that
> > doing clflush in userspace is both possible and more efficient.
> >
> > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I
> > thought mesa plan is to drop the old classic driver for when we'll need
> > lmem. It's not much, but would allow us to drop a few things.
>
> From the opposite perspective, it should only be a wrapper around code
> that is being used internally for similar transfers. (One side-effect is
> that it can be used to poke more directly at those internals.) It is also
> not clear what the preferred strategy will be in future, especially as
> people start discussing migration-on-pagefault.

Hm, where do we look at migrate-on-pagefault?

I mean aside from the entire resurrection of the mappable deamon
because apparently we can't design apertures for pci bars which are
big enough (unlike amd, which fixed this now). But that's just an
lmem->lmem migration to squeeze it into the right range (and hey we
know how to do that, we even have the old code still).

> It comes down to whether the maintenance burden of maintaining a
> consistent API is worth the maintenance burden of not!

Yeah it's minor, but then pwrite has some irky corner-cases (I
stumbled over the vlc wtf that originally motivated the introduction
of the pwrite hook, and the reintroduction of the page-by-page pwrite
for shmem that's not pinned). So it's not the cleanest uapi we have
since almost a decade of gunk now sitting on top. And when I went
looking at iris/anv it seems like we've sunset it for good going
forward. Note I'm not going for complete removal, just not allowing it
if you set lmem as one of the placements of your bo. So pwrite into an
upload buffer in smem, mapped through the TT to the gpu, would still
be fine. Which I guess should cover all the igt pwrites for
batchbuffers.
-Daniel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 80ff5ad9bc07..8cdee185251a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -52,6 +52,8 @@  struct drm_i915_gem_object_ops {
 	void (*truncate)(struct drm_i915_gem_object *obj);
 	void (*writeback)(struct drm_i915_gem_object *obj);
 
+	int (*pread)(struct drm_i915_gem_object *,
+		     const struct drm_i915_gem_pread *arg);
 	int (*pwrite)(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *arg);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85677ae89849..4ba386ab35e7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -463,6 +463,12 @@  i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
+	ret = -ENODEV;
+	if (obj->ops->pread)
+		ret = obj->ops->pread(obj, args);
+	if (ret != -ENODEV)
+		goto out;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c
index 701bcac3479e..54b2c7bf177d 100644
--- a/drivers/gpu/drm/i915/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/intel_region_lmem.c
@@ -7,10 +7,86 @@ 
 #include "intel_memory_region.h"
 #include "intel_region_lmem.h"
 
+static int lmem_pread(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pread *arg)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
+	intel_wakeref_t wakeref;
+	struct dma_fence *fence;
+	char __user *user_data;
+	unsigned int offset;
+	unsigned long idx;
+	u64 remain;
+	int ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_lock(obj);
+	ret = i915_gem_object_set_to_wc_domain(obj, false);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
+
+	wakeref = intel_runtime_pm_get(rpm);
+
+	remain = arg->size;
+	user_data = u64_to_user_ptr(arg->data_ptr);
+	offset = offset_in_page(arg->offset);
+	for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) {
+		unsigned long unwritten;
+		void __iomem *vaddr;
+		int length;
+
+		length = remain;
+		if (offset + length > PAGE_SIZE)
+			length = PAGE_SIZE - offset;
+
+		vaddr = i915_gem_object_lmem_io_map_page(obj, idx);
+		if (!vaddr) {
+			ret = -ENOMEM;
+			goto out_put;
+		}
+
+		unwritten = copy_to_user(user_data,
+					 (void __force *)vaddr + offset,
+					 length);
+		io_mapping_unmap_atomic(vaddr);
+		if (unwritten) {
+			ret = -EFAULT;
+			goto out_put;
+		}
+
+		remain -= length;
+		user_data += length;
+		offset = 0;
+	}
+
+out_put:
+	intel_runtime_pm_put(rpm, wakeref);
+	i915_gem_object_unlock_fence(obj, fence);
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+
+	return ret;
+}
+
 static const struct drm_i915_gem_object_ops region_lmem_obj_ops = {
 	.get_pages = i915_memory_region_get_pages_buddy,
 	.put_pages = i915_memory_region_put_pages_buddy,
 	.release = i915_gem_object_release_memory_region,
+
+	.pread = lmem_pread,
 };
 
 static struct drm_i915_gem_object *