Message ID | 1444285469-8187-6-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi, On 08/10/15 07:24, ankitprasad.r.sharma@intel.com wrote: > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > This patch adds support for extending the pread/pwrite functionality > for objects not backed by shmem. The access will be made through > gtt interface. > This will cover prime objects as well as stolen memory backed objects > but for userptr objects it is still forbidden. Where is the part which forbids it for userptr objects? > v2: Drop locks around slow_user_access, prefault the pages before > access (Chris) > > v3: Rebased to the latest drm-intel-nightly (Ankit) > > v4: Moved page base & offset calculations outside the copy loop, > corrected data types for size and offset variables, corrected if-else > braces format (Tvrtko/kerneldocs) > > v5: Enabled pread/pwrite for all non-shmem backed objects including > without tiling restrictions (Ankit) > > v6: Using pwrite_fast for non-shmem backed objects as well (Chris) > > Testcase: igt/gem_stolen > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > --- > drivers/gpu/drm/i915/i915_gem.c | 125 +++++++++++++++++++++++++++++++++------- > 1 file changed, 104 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 91a2e97..2c94e22 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, > return ret ? - EFAULT : 0; > } > > +static inline uint64_t > +slow_user_access(struct io_mapping *mapping, > + uint64_t page_base, int page_offset, > + char __user *user_data, > + int length, bool pwrite) > +{ > + void __iomem *vaddr_inatomic; > + void *vaddr; > + uint64_t unwritten; > + > + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); > + /* We can use the cpu mem copy function because this is X86. */ > + vaddr = (void __force *)vaddr_inatomic + page_offset; > + if (pwrite) > + unwritten = __copy_from_user(vaddr, user_data, length); > + else > + unwritten = __copy_to_user(user_data, vaddr, length); > + > + io_mapping_unmap(vaddr_inatomic); > + return unwritten; > +} > + > +static int > +i915_gem_gtt_pread(struct drm_device *dev, > + struct drm_i915_gem_object *obj, uint64_t size, > + uint64_t data_offset, uint64_t data_ptr) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + char __user *user_data; > + uint64_t remain; > + uint64_t offset, page_base; > + int page_offset, page_length, ret = 0; > + > + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); > + if (ret) > + goto out; > + > + ret = i915_gem_object_set_to_gtt_domain(obj, false); > + if (ret) > + goto out_unpin; > + > + ret = i915_gem_object_put_fence(obj); > + if (ret) > + goto out_unpin; > + > + user_data = to_user_ptr(data_ptr); > + remain = size; > + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; > + > + mutex_unlock(&dev->struct_mutex); > + if (likely(!i915.prefault_disable)) > + ret = fault_in_multipages_writeable(user_data, remain); > + > + /* > + * page_offset = offset within page > + * page_base = page offset within aperture > + */ > + page_offset = offset_in_page(offset); > + page_base = offset & PAGE_MASK; > + > + while (remain > 0) { > + /* page_length = bytes to copy for this page */ > + page_length = remain; > + if ((page_offset + remain) > PAGE_SIZE) > + page_length = PAGE_SIZE - page_offset; > + > + /* This is a slow read/write as it tries to read from > + * and write to user memory which may result into page > + * faults > + */ > + ret = slow_user_access(dev_priv->gtt.mappable, page_base, > + page_offset, user_data, > + page_length, false); > + > + if (ret) { > + ret = -EFAULT; > + break; > + } > + > + remain -= page_length; > + user_data += page_length; > + page_base += page_length; > + page_offset = 0; > + } > + > + mutex_lock(&dev->struct_mutex); > + > +out_unpin: > + i915_gem_object_ggtt_unpin(obj); > +out: > + return ret; > +} > + > static int > i915_gem_shmem_pread(struct drm_device *dev, > struct drm_i915_gem_object *obj, > @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > goto out; > } > > - /* prime objects have no backing filp to GEM pread/pwrite > - * pages from. > - */ > - if (!obj->base.filp) { > - ret = -EINVAL; > - goto out; > - } > - > trace_i915_gem_object_pread(obj, args->offset, args->size); > > - ret = i915_gem_shmem_pread(dev, obj, args, file); > + /* pread for non shmem backed objects */ > + if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE) > + ret = i915_gem_gtt_pread(dev, obj, args->size, > + args->offset, args->data_ptr); > + else > + ret = i915_gem_shmem_pread(dev, obj, args, file); > > out: > drm_gem_object_unreference(&obj->base); > @@ -795,7 +885,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, > char __user *user_data; > int page_offset, page_length, ret; > > - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); > + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); Why is this needed? > if (ret) > goto out; > > @@ -1090,14 +1180,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > goto out; > } > > - /* prime objects have no backing filp to GEM pread/pwrite > - * pages from. > - */ > - if (!obj->base.filp) { > - ret = -EINVAL; > - goto out; > - } > - > trace_i915_gem_object_pwrite(obj, args->offset, args->size); > > ret = -EFAULT; > @@ -1108,8 +1190,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > * perspective, requiring manual detiling by the client. > */ > if (obj->tiling_mode == I915_TILING_NONE && > - obj->base.write_domain != I915_GEM_DOMAIN_CPU && > - cpu_write_needs_clflush(obj)) { > + (!obj->base.filp || > + (obj->base.write_domain != I915_GEM_DOMAIN_CPU && > + cpu_write_needs_clflush(obj)))) { > ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); So the pwrite path will fail if a page fault happens, as opposed to the pread path which makes an effort to handle it. What is the reason for this asymmetry in the API? Or I am missing something? Regards, Tvrtko
On Thu, 2015-10-08 at 14:56 +0100, Tvrtko Ursulin wrote: > Hi, > > On 08/10/15 07:24, ankitprasad.r.sharma@intel.com wrote: > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > > This patch adds support for extending the pread/pwrite functionality > > for objects not backed by shmem. The access will be made through > > gtt interface. > > This will cover prime objects as well as stolen memory backed objects > > but for userptr objects it is still forbidden. > > Where is the part which forbids it for userptr objects? In version 5, updated the patch handle pwrite/pread for all non-shmem backed objects, including userptr objects Will update the Commit message > > > v2: Drop locks around slow_user_access, prefault the pages before > > access (Chris) > > > > v3: Rebased to the latest drm-intel-nightly (Ankit) > > > > v4: Moved page base & offset calculations outside the copy loop, > > corrected data types for size and offset variables, corrected if-else > > braces format (Tvrtko/kerneldocs) > > > > v5: Enabled pread/pwrite for all non-shmem backed objects including > > without tiling restrictions (Ankit) > > > > v6: Using pwrite_fast for non-shmem backed objects as well (Chris) > > > > Testcase: igt/gem_stolen > > > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > --- > > drivers/gpu/drm/i915/i915_gem.c | 125 +++++++++++++++++++++++++++++++++------- > > 1 file changed, 104 insertions(+), 21 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index 91a2e97..2c94e22 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, > > return ret ? - EFAULT : 0; > > } > > > > +static inline uint64_t > > +slow_user_access(struct io_mapping *mapping, > > + uint64_t page_base, int page_offset, > > + char __user *user_data, > > + int length, bool pwrite) > > +{ > > + void __iomem *vaddr_inatomic; > > + void *vaddr; > > + uint64_t unwritten; > > + > > + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); > > + /* We can use the cpu mem copy function because this is X86. */ > > + vaddr = (void __force *)vaddr_inatomic + page_offset; > > + if (pwrite) > > + unwritten = __copy_from_user(vaddr, user_data, length); > > + else > > + unwritten = __copy_to_user(user_data, vaddr, length); > > + > > + io_mapping_unmap(vaddr_inatomic); > > + return unwritten; > > +} > > + > > +static int > > +i915_gem_gtt_pread(struct drm_device *dev, > > + struct drm_i915_gem_object *obj, uint64_t size, > > + uint64_t data_offset, uint64_t data_ptr) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + char __user *user_data; > > + uint64_t remain; > > + uint64_t offset, page_base; > > + int page_offset, page_length, ret = 0; > > + > > + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); > > + if (ret) > > + goto out; > > + > > + ret = i915_gem_object_set_to_gtt_domain(obj, false); > > + if (ret) > > + goto out_unpin; > > + > > + ret = i915_gem_object_put_fence(obj); > > + if (ret) > > + goto out_unpin; > > + > > + user_data = to_user_ptr(data_ptr); > > + remain = size; > > + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; > > + > > + mutex_unlock(&dev->struct_mutex); > > + if (likely(!i915.prefault_disable)) > > + ret = fault_in_multipages_writeable(user_data, remain); > > + > > + /* > > + * page_offset = offset within page > > + * page_base = page offset within aperture > > + */ > > + page_offset = offset_in_page(offset); > > + page_base = offset & PAGE_MASK; > > + > > + while (remain > 0) { > > + /* page_length = bytes to copy for this page */ > > + page_length = remain; > > + if ((page_offset + remain) > PAGE_SIZE) > > + page_length = PAGE_SIZE - page_offset; > > + > > + /* This is a slow read/write as it tries to read from > > + * and write to user memory which may result into page > > + * faults > > + */ > > + ret = slow_user_access(dev_priv->gtt.mappable, page_base, > > + page_offset, user_data, > > + page_length, false); > > + > > + if (ret) { > > + ret = -EFAULT; > > + break; > > + } > > + > > + remain -= page_length; > > + user_data += page_length; > > + page_base += page_length; > > + page_offset = 0; > > + } > > + > > + mutex_lock(&dev->struct_mutex); > > + > > +out_unpin: > > + i915_gem_object_ggtt_unpin(obj); > > +out: > > + return ret; > > +} > > + > > static int > > i915_gem_shmem_pread(struct drm_device *dev, > > struct drm_i915_gem_object *obj, > > @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > > goto out; > > } > > > > - /* prime objects have no backing filp to GEM pread/pwrite > > - * pages from. > > - */ > > - if (!obj->base.filp) { > > - ret = -EINVAL; > > - goto out; > > - } > > - > > trace_i915_gem_object_pread(obj, args->offset, args->size); > > > > - ret = i915_gem_shmem_pread(dev, obj, args, file); > > + /* pread for non shmem backed objects */ > > + if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE) > > + ret = i915_gem_gtt_pread(dev, obj, args->size, > > + args->offset, args->data_ptr); > > + else > > + ret = i915_gem_shmem_pread(dev, obj, args, file); > > > > out: > > drm_gem_object_unreference(&obj->base); > > @@ -795,7 +885,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, > > char __user *user_data; > > int page_offset, page_length, ret; > > > > - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); > > + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); > > Why is this needed? This was Chris' suggestion. This change can go as a separate patch, if needed. I do not think pwrite/pread has any dependency on this. Need Chris to respond on this. > > > if (ret) > > goto out; > > > > @@ -1090,14 +1180,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > goto out; > > } > > > > - /* prime objects have no backing filp to GEM pread/pwrite > > - * pages from. > > - */ > > - if (!obj->base.filp) { > > - ret = -EINVAL; > > - goto out; > > - } > > - > > trace_i915_gem_object_pwrite(obj, args->offset, args->size); > > > > ret = -EFAULT; > > @@ -1108,8 +1190,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > * perspective, requiring manual detiling by the client. > > */ > > if (obj->tiling_mode == I915_TILING_NONE && > > - obj->base.write_domain != I915_GEM_DOMAIN_CPU && > > - cpu_write_needs_clflush(obj)) { > > + (!obj->base.filp || > > + (obj->base.write_domain != I915_GEM_DOMAIN_CPU && > > + cpu_write_needs_clflush(obj)))) { > > ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); > > So the pwrite path will fail if a page fault happens, as opposed to the > pread path which makes an effort to handle it. What is the reason for > this asymmetry in the API? Or I am missing something? I had earlier implemented the pwrite and pread maintaining the symmetry in the API. After couple of revisions we landed on this implementation. Need Chris to respond on this. Thanks, Ankit
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91a2e97..2c94e22 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, + uint64_t page_base, int page_offset, + char __user *user_data, + int length, bool pwrite) +{ + void __iomem *vaddr_inatomic; + void *vaddr; + uint64_t unwritten; + + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)vaddr_inatomic + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(vaddr_inatomic); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + char __user *user_data; + uint64_t remain; + uint64_t offset, page_base; + int page_offset, page_length, ret = 0; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) + goto out; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) + ret = fault_in_multipages_writeable(user_data, remain); + + /* + * page_offset = offset within page + * page_base = page offset within aperture + */ + page_offset = offset_in_page(offset); + page_base = offset & PAGE_MASK; + + while (remain > 0) { + /* page_length = bytes to copy for this page */ + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + /* This is a slow read/write as it tries to read from + * and write to user memory which may result into page + * faults + */ + ret = slow_user_access(dev_priv->gtt.mappable, page_base, + page_offset, user_data, + page_length, false); + + if (ret) { + ret = -EFAULT; + break; + } + + remain -= page_length; + user_data += page_length; + page_base += page_length; + page_offset = 0; + } + + mutex_lock(&dev->struct_mutex); + +out_unpin: + i915_gem_object_ggtt_unpin(obj); +out: + return ret; +} + static int i915_gem_shmem_pread(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, goto out; } - /* prime objects have no backing filp to GEM pread/pwrite - * pages from. - */ - if (!obj->base.filp) { - ret = -EINVAL; - goto out; - } - trace_i915_gem_object_pread(obj, args->offset, args->size); - ret = i915_gem_shmem_pread(dev, obj, args, file); + /* pread for non shmem backed objects */ + if (!obj->base.filp && obj->tiling_mode == I915_TILING_NONE) + ret = i915_gem_gtt_pread(dev, obj, args->size, + args->offset, args->data_ptr); + else + ret = i915_gem_shmem_pread(dev, obj, args, file); out: drm_gem_object_unreference(&obj->base); @@ -795,7 +885,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, char __user *user_data; int page_offset, page_length, ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); if (ret) goto out; @@ -1090,14 +1180,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - /* prime objects have no backing filp to GEM pread/pwrite - * pages from. - */ - if (!obj->base.filp) { - ret = -EINVAL; - goto out; - } - trace_i915_gem_object_pwrite(obj, args->offset, args->size); ret = -EFAULT; @@ -1108,8 +1190,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * perspective, requiring manual detiling by the client. */ if (obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU && - cpu_write_needs_clflush(obj)) { + (!obj->base.filp || + (obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)))) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -1119,7 +1202,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); - else + else if (obj->base.filp) ret = i915_gem_shmem_pwrite(dev, obj, args, file); }