Message ID | 20190627205633.1143-17-matthew.auld@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce memory region concept (including device local memory) | expand |
Quoting Matthew Auld (2019-06-27 21:56:12) > We need to add support for pread'ing an LMEM object. > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 6 ++ > drivers/gpu/drm/i915/intel_region_lmem.c | 76 +++++++++++++++++++ > 3 files changed, 84 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index 80ff5ad9bc07..8cdee185251a 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops { > void (*truncate)(struct drm_i915_gem_object *obj); > void (*writeback)(struct drm_i915_gem_object *obj); > > + int (*pread)(struct drm_i915_gem_object *, > + const struct drm_i915_gem_pread *arg); > int (*pwrite)(struct drm_i915_gem_object *obj, > const struct drm_i915_gem_pwrite *arg); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 85677ae89849..4ba386ab35e7 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -463,6 +463,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > > trace_i915_gem_object_pread(obj, args->offset, args->size); > > + ret = -ENODEV; > + if (obj->ops->pread) > + ret = obj->ops->pread(obj, args); > + if (ret != -ENODEV) > + goto out; > + > ret = i915_gem_object_wait(obj, > I915_WAIT_INTERRUPTIBLE, > MAX_SCHEDULE_TIMEOUT); > diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c > index 701bcac3479e..54b2c7bf177d 100644 > --- a/drivers/gpu/drm/i915/intel_region_lmem.c > +++ b/drivers/gpu/drm/i915/intel_region_lmem.c > @@ -7,10 +7,86 @@ > #include "intel_memory_region.h" > #include "intel_region_lmem.h" > > +static int lmem_pread(struct drm_i915_gem_object *obj, > + const struct drm_i915_gem_pread *arg) > +{ > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + struct intel_runtime_pm *rpm = &i915->runtime_pm; > + intel_wakeref_t wakeref; > + struct dma_fence *fence; > + char __user *user_data; > + unsigned int offset; > + unsigned long idx; > + u64 remain; > + int ret; > + > + ret = i915_gem_object_pin_pages(obj); > + if (ret) > + return ret; > + > + i915_gem_object_lock(obj); > + ret = i915_gem_object_set_to_wc_domain(obj, false); You chose to opt out of the unlocked wait before the locked wait? > + if (ret) { > + i915_gem_object_unlock(obj); > + goto out_unpin; > + } > + > + fence = i915_gem_object_lock_fence(obj); > + i915_gem_object_unlock(obj); > + if (!fence) { > + ret = -ENOMEM; > + goto out_unpin; > + } > + > + wakeref = intel_runtime_pm_get(rpm); Something not mentioned so far is the story for mm->rpm. > + remain = arg->size; > + user_data = u64_to_user_ptr(arg->data_ptr); > + offset = offset_in_page(arg->offset); > + for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) { > + unsigned long unwritten; > + void __iomem *vaddr; > + int length; > + > + length = remain; > + if (offset + length > PAGE_SIZE) > + length = PAGE_SIZE - offset; > + > + vaddr = i915_gem_object_lmem_io_map_page(obj, idx); > + if (!vaddr) { > + ret = -ENOMEM; > + goto out_put; > + } > + > + unwritten = copy_to_user(user_data, Except this is a secret atomic section!!! > + (void __force *)vaddr + offset, > + length); > + io_mapping_unmap_atomic(vaddr); > + if (unwritten) { > + ret = -EFAULT; > + goto out_put; > + } > + > + remain -= length; > + user_data += length; > + offset = 0; > + } > + > +out_put: > + intel_runtime_pm_put(rpm, wakeref); > + i915_gem_object_unlock_fence(obj, fence); > +out_unpin: > + i915_gem_object_unpin_pages(obj); > + > + return ret; > +} > + > static const struct drm_i915_gem_object_ops region_lmem_obj_ops = { > .get_pages = i915_memory_region_get_pages_buddy, > .put_pages = i915_memory_region_put_pages_buddy, > .release = i915_gem_object_release_memory_region, > + > + .pread = lmem_pread, > }; > > static struct drm_i915_gem_object * > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote: > We need to add support for pread'ing an LMEM object. Why? Usage outside from igts seems pretty dead, at least looking at iris and anv. This was kinda a neat thing for when we didn't yet realized that doing clflush in userspace is both possible and more efficient. Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I thought mesa plan is to drop the old classic driver for when we'll need lmem. It's not much, but would allow us to drop a few things. -Daniel > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 6 ++ > drivers/gpu/drm/i915/intel_region_lmem.c | 76 +++++++++++++++++++ > 3 files changed, 84 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index 80ff5ad9bc07..8cdee185251a 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops { > void (*truncate)(struct drm_i915_gem_object *obj); > void (*writeback)(struct drm_i915_gem_object *obj); > > + int (*pread)(struct drm_i915_gem_object *, > + const struct drm_i915_gem_pread *arg); > int (*pwrite)(struct drm_i915_gem_object *obj, > const struct drm_i915_gem_pwrite *arg); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 85677ae89849..4ba386ab35e7 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -463,6 +463,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > > trace_i915_gem_object_pread(obj, args->offset, args->size); > > + ret = -ENODEV; > + if (obj->ops->pread) > + ret = obj->ops->pread(obj, args); > + if (ret != -ENODEV) > + goto out; > + > ret = i915_gem_object_wait(obj, > I915_WAIT_INTERRUPTIBLE, > MAX_SCHEDULE_TIMEOUT); > diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c > index 701bcac3479e..54b2c7bf177d 100644 > --- a/drivers/gpu/drm/i915/intel_region_lmem.c > +++ b/drivers/gpu/drm/i915/intel_region_lmem.c > @@ -7,10 +7,86 @@ > #include "intel_memory_region.h" > #include "intel_region_lmem.h" > > +static int lmem_pread(struct drm_i915_gem_object *obj, > + const struct drm_i915_gem_pread *arg) > +{ > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + struct intel_runtime_pm *rpm = &i915->runtime_pm; > + intel_wakeref_t wakeref; > + struct dma_fence *fence; > + char __user *user_data; > + unsigned int offset; > + unsigned long idx; > + u64 remain; > + int ret; > + > + ret = i915_gem_object_pin_pages(obj); > + if (ret) > + return ret; > + > + i915_gem_object_lock(obj); > + ret = i915_gem_object_set_to_wc_domain(obj, false); > + if (ret) { > + i915_gem_object_unlock(obj); > + goto out_unpin; > + } > + > + fence = i915_gem_object_lock_fence(obj); > + i915_gem_object_unlock(obj); > + if (!fence) { > + ret = -ENOMEM; > + goto out_unpin; > + } > + > + wakeref = intel_runtime_pm_get(rpm); > + > + remain = arg->size; > + user_data = u64_to_user_ptr(arg->data_ptr); > + offset = offset_in_page(arg->offset); > + for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) { > + unsigned long unwritten; > + void __iomem *vaddr; > + int length; > + > + length = remain; > + if (offset + length > PAGE_SIZE) > + length = PAGE_SIZE - offset; > + > + vaddr = i915_gem_object_lmem_io_map_page(obj, idx); > + if (!vaddr) { > + ret = -ENOMEM; > + goto out_put; > + } > + > + unwritten = copy_to_user(user_data, > + (void __force *)vaddr + offset, > + length); > + io_mapping_unmap_atomic(vaddr); > + if (unwritten) { > + ret = -EFAULT; > + goto out_put; > + } > + > + remain -= length; > + user_data += length; > + offset = 0; > + } > + > +out_put: > + intel_runtime_pm_put(rpm, wakeref); > + i915_gem_object_unlock_fence(obj, fence); > +out_unpin: > + i915_gem_object_unpin_pages(obj); > + > + return ret; > +} > + > static const struct drm_i915_gem_object_ops region_lmem_obj_ops = { > .get_pages = i915_memory_region_get_pages_buddy, > .put_pages = i915_memory_region_put_pages_buddy, > .release = i915_gem_object_release_memory_region, > + > + .pread = lmem_pread, > }; > > static struct drm_i915_gem_object * > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 30/07/2019 09:58, Daniel Vetter wrote: > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote: >> We need to add support for pread'ing an LMEM object. > > Why? Usage outside from igts seems pretty dead, at least looking at iris > and anv. This was kinda a neat thing for when we didn't yet realized that > doing clflush in userspace is both possible and more efficient. > > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I > thought mesa plan is to drop the old classic driver for when we'll need > lmem. It's not much, but would allow us to drop a few things. Hmm, it was at least useful in the super early days for debugging. If we were to drop this what do we do with the igts? Just use mmap?
On Tue, Jul 30, 2019 at 10:25:10AM +0100, Matthew Auld wrote: > On 30/07/2019 09:58, Daniel Vetter wrote: > > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote: > > > We need to add support for pread'ing an LMEM object. > > > > Why? Usage outside from igts seems pretty dead, at least looking at iris > > and anv. This was kinda a neat thing for when we didn't yet realized that > > doing clflush in userspace is both possible and more efficient. > > > > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I > > thought mesa plan is to drop the old classic driver for when we'll need > > lmem. It's not much, but would allow us to drop a few things. > > Hmm, it was at least useful in the super early days for debugging. If we > were to drop this what do we do with the igts? Just use mmap? wc mmap is probably simplest. I think we could do a compat function in igt that does that when pwrite isn't available. Could also have that in libdrm_intel, in case some of the UMDs have a hard time getting off it. -Daniel
Quoting Daniel Vetter (2019-07-30 09:58:22) > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote: > > We need to add support for pread'ing an LMEM object. > > Why? Usage outside from igts seems pretty dead, at least looking at iris > and anv. This was kinda a neat thing for when we didn't yet realized that > doing clflush in userspace is both possible and more efficient. > > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I > thought mesa plan is to drop the old classic driver for when we'll need > lmem. It's not much, but would allow us to drop a few things. From the opposite perspective, it should only be a wrapper around code that is being used internally for similar transfers. (One side-effect is that it can be used to poke more directly at those internals.) It is also not clear what the preferred strategy will be in future, especially as people start discussing migration-on-pagefault. It comes down to whether the maintenance burden of maintaining a consistent API is worth the maintenance burden of not! -Chris
On Tue, Jul 30, 2019 at 2:05 PM Chris Wilson <chris@chris-wilson.co.uk> wrote: > Quoting Daniel Vetter (2019-07-30 09:58:22) > > On Thu, Jun 27, 2019 at 09:56:12PM +0100, Matthew Auld wrote: > > > We need to add support for pread'ing an LMEM object. > > > > Why? Usage outside from igts seems pretty dead, at least looking at iris > > and anv. This was kinda a neat thing for when we didn't yet realized that > > doing clflush in userspace is both possible and more efficient. > > > > Same for pwrite, iris just dropped it, anv doesn't seem to use it. And I > > thought mesa plan is to drop the old classic driver for when we'll need > > lmem. It's not much, but would allow us to drop a few things. > > From the opposite perspective, it should only be a wrapper around code > that is being used internally for similar transfers. (One side-effect is > that it can be used to poke more directly at those internals.) It is also > not clear what the preferred strategy will be in future, especially as > people start discussing migration-on-pagefault. Hm, where do we look at migrate-on-pagefault? I mean aside from the entire resurrection of the mappable deamon because apparently we can't design apertures for pci bars which are big enough (unlike amd, which fixed this now). But that's just an lmem->lmem migration to squeeze it into the right range (and hey we know how to do that, we even have the old code still). > It comes down to whether the maintenance burden of maintaining a > consistent API is worth the maintenance burden of not! Yeah it's minor, but then pwrite has some irky corner-cases (I stumbled over the vlc wtf that originally motivated the introduction of the pwrite hook, and the reintroduction of the page-by-page pwrite for shmem that's not pinned). So it's not the cleanest uapi we have since almost a decade of gunk now sitting on top. And when I went looking at iris/anv it seems like we've sunset it for good going forward. Note I'm not going for complete removal, just not allowing it if you set lmem as one of the placements of your bo. So pwrite into an upload buffer in smem, mapped through the TT to the gpu, would still be fine. Which I guess should cover all the igt pwrites for batchbuffers. -Daniel
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 80ff5ad9bc07..8cdee185251a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops { void (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*pread)(struct drm_i915_gem_object *, + const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85677ae89849..4ba386ab35e7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -463,6 +463,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) + goto out; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/intel_region_lmem.c index 701bcac3479e..54b2c7bf177d 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/intel_region_lmem.c @@ -7,10 +7,86 @@ #include "intel_memory_region.h" #include "intel_region_lmem.h" +static int lmem_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_runtime_pm *rpm = &i915->runtime_pm; + intel_wakeref_t wakeref; + struct dma_fence *fence; + char __user *user_data; + unsigned int offset; + unsigned long idx; + u64 remain; + int ret; + + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + i915_gem_object_lock(obj); + ret = i915_gem_object_set_to_wc_domain(obj, false); + if (ret) { + i915_gem_object_unlock(obj); + goto out_unpin; + } + + fence = i915_gem_object_lock_fence(obj); + i915_gem_object_unlock(obj); + if (!fence) { + ret = -ENOMEM; + goto out_unpin; + } + + wakeref = intel_runtime_pm_get(rpm); + + remain = arg->size; + user_data = u64_to_user_ptr(arg->data_ptr); + offset = offset_in_page(arg->offset); + for (idx = arg->offset >> PAGE_SHIFT; remain; idx++) { + unsigned long unwritten; + void __iomem *vaddr; + int length; + + length = remain; + if (offset + length > PAGE_SIZE) + length = PAGE_SIZE - offset; + + vaddr = i915_gem_object_lmem_io_map_page(obj, idx); + if (!vaddr) { + ret = -ENOMEM; + goto out_put; + } + + unwritten = copy_to_user(user_data, + (void __force *)vaddr + offset, + length); + io_mapping_unmap_atomic(vaddr); + if (unwritten) { + ret = -EFAULT; + goto out_put; + } + + remain -= length; + user_data += length; + offset = 0; + } + +out_put: + intel_runtime_pm_put(rpm, wakeref); + i915_gem_object_unlock_fence(obj, fence); +out_unpin: + i915_gem_object_unpin_pages(obj); + + return ret; +} + static const struct drm_i915_gem_object_ops region_lmem_obj_ops = { .get_pages = i915_memory_region_get_pages_buddy, .put_pages = i915_memory_region_put_pages_buddy, .release = i915_gem_object_release_memory_region, + + .pread = lmem_pread, }; static struct drm_i915_gem_object *
We need to add support for pread'ing an LMEM object. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 6 ++ drivers/gpu/drm/i915/intel_region_lmem.c | 76 +++++++++++++++++++ 3 files changed, 84 insertions(+)