Message ID | 1450765253-32104-3-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Ankitprasad, [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on v4.4-rc6 next-20151221] url: https://github.com/0day-ci/linux/commits/ankitprasad-r-sharma-intel-com/Support-for-creating-using-Stolen-memory-backed-objects/20151222-144648 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: x86_64-randconfig-x010-201551 (attached as .config) reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): drivers/gpu/drm/i915/i915_gem.c: In function 'i915_gem_gtt_pwrite_fast': >> drivers/gpu/drm/i915/i915_gem.c:836:18: error: 'struct i915_address_space' has no member named 'insert_page' i915->gtt.base.insert_page(&i915->gtt.base, ^ >> drivers/gpu/drm/i915/i915_gem.c:837:10: error: implicit declaration of function 'i915_gem_object_get_dma_address' [-Werror=implicit-function-declaration] i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), ^ cc1: some warnings being treated as errors vim +836 drivers/gpu/drm/i915/i915_gem.c 830 u32 page_base = node.start; 831 unsigned page_offset = offset_in_page(offset); 832 unsigned page_length = PAGE_SIZE - page_offset; 833 page_length = remain < page_length ? remain : page_length; 834 if (node.allocated) { 835 wmb(); > 836 i915->gtt.base.insert_page(&i915->gtt.base, > 837 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 838 node.start, 839 I915_CACHE_NONE, 840 0); --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi, On 22/12/15 06:20, ankitprasad.r.sharma@intel.com wrote: > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, > we try a nonblocking pin for the whole object (since that is fastest if > reused), then failing that we try to grab one page in the mappable > aperture. It also allows us to handle objects larger than the mappable > aperture (e.g. if we need to pwrite with vGPU restricting the aperture > to a measely 8MiB or something like that). > > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) > > v3: Combined loops based on local patch by Chris (Chris) > > v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) > > v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) > > v5: Added wrapper for drm_mm_remove_node() (Chris) > > v6: Added get_pages call before pinning the pages (Tvrtko) > Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 94 +++++++++++++++++++++++++++++++---------- > 1 file changed, 72 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index bf7f203..f11ec89 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -61,6 +61,23 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > return obj->pin_display; > } > > +static int > +insert_mappable_node(struct drm_i915_private *i915, > + struct drm_mm_node *node) > +{ > + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, > + PAGE_SIZE, 0, 0, It does not look ideal that the function name is saying it will insert a node and then it only inserts one page. In that respect previous version looked better to me but since it is static and single use - whatever - this has been dragging for too long. > + 0, i915->gtt.mappable_end, > + DRM_MM_SEARCH_DEFAULT, > + DRM_MM_CREATE_DEFAULT); > +} > + > +static void > +remove_mappable_node(struct drm_mm_node *node) > +{ > + drm_mm_remove_node(node); > +} > + > /* some bookkeeping */ > static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, > size_t size) > @@ -760,20 +777,34 @@ fast_user_write(struct io_mapping *mapping, > * user into the GTT, uncached. > */ > static int > -i915_gem_gtt_pwrite_fast(struct drm_device *dev, > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, > struct drm_i915_gem_object *obj, > struct drm_i915_gem_pwrite *args, > struct drm_file *file) > { > - struct drm_i915_private *dev_priv = dev->dev_private; > - ssize_t remain; > - loff_t offset, page_base; > + struct drm_mm_node node; > + uint64_t remain, offset; > char __user *user_data; > - int page_offset, page_length, ret; > + int ret; > > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); > - if (ret) > - goto out; > + if (ret) { > + memset(&node, 0, sizeof(node)); > + ret = insert_mappable_node(i915, &node); > + if (ret) > + goto out; > + > + ret = i915_gem_object_get_pages(obj); > + if (ret) { > + remove_mappable_node(&node); > + goto out; > + } > + > + i915_gem_object_pin_pages(obj); > + } else { > + node.start = i915_gem_obj_ggtt_offset(obj); > + node.allocated = false; > + } > > ret = i915_gem_object_set_to_gtt_domain(obj, true); > if (ret) > @@ -783,31 +814,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, > if (ret) > goto out_unpin; > > - user_data = to_user_ptr(args->data_ptr); > - remain = args->size; > - > - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; > - > intel_fb_obj_invalidate(obj, ORIGIN_GTT); > + obj->dirty = true; > > - while (remain > 0) { > + user_data = to_user_ptr(args->data_ptr); > + offset = args->offset; > + remain = args->size; > + while (remain) { > /* Operation in this page > * > * page_base = page offset within aperture > * page_offset = offset within page > * page_length = bytes to copy for this page > */ > - page_base = offset & PAGE_MASK; > - page_offset = offset_in_page(offset); > - page_length = remain; > - if ((page_offset + remain) > PAGE_SIZE) > - page_length = PAGE_SIZE - page_offset; > - > + u32 page_base = node.start; Compiler does not complain about possible truncation here? I would change it to a type of equivalent width just in case. Even before it was loff_t. > + unsigned page_offset = offset_in_page(offset); > + unsigned page_length = PAGE_SIZE - page_offset; > + page_length = remain < page_length ? remain : page_length; > + if (node.allocated) { > + wmb(); > + i915->gtt.base.insert_page(&i915->gtt.base, > + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), > + node.start, > + I915_CACHE_NONE, > + 0); > + wmb(); > + } else { > + page_base += offset & PAGE_MASK; > + } > /* If we get a fault while copying data, then (presumably) our > * source page isn't available. Return the error and we'll > * retry in the slow path. > */ > - if (fast_user_write(dev_priv->gtt.mappable, page_base, > + if (fast_user_write(i915->gtt.mappable, page_base, > page_offset, user_data, page_length)) { > ret = -EFAULT; > goto out_flush; > @@ -821,7 +860,18 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, > out_flush: > intel_fb_obj_flush(obj, false, ORIGIN_GTT); > out_unpin: > - i915_gem_object_ggtt_unpin(obj); > + if (node.allocated) { > + wmb(); > + i915->gtt.base.clear_range(&i915->gtt.base, > + node.start, node.size, > + true); > + remove_mappable_node(&node); > + i915_gem_object_unpin_pages(obj); > + i915_gem_object_put_pages(obj); > + } > + else { > + i915_gem_object_ggtt_unpin(obj); > + } > out: > return ret; > } > @@ -1086,7 +1136,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > if (obj->tiling_mode == I915_TILING_NONE && > obj->base.write_domain != I915_GEM_DOMAIN_CPU && > cpu_write_needs_clflush(obj)) { > - ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); > + ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); > /* Note that the gtt paths might fail with non-page-backed user > * pointers (e.g. gtt mappings when moving data between > * textures). Fallback to the shmem path in that case. */ > Regards, Tvrtko
On Tue, 2015-12-22 at 10:44 +0000, Tvrtko Ursulin wrote: > Hi, > > On 22/12/15 06:20, ankitprasad.r.sharma@intel.com wrote: > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, > > we try a nonblocking pin for the whole object (since that is fastest if > > reused), then failing that we try to grab one page in the mappable > > aperture. It also allows us to handle objects larger than the mappable > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture > > to a measely 8MiB or something like that). > > > > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) > > > > v3: Combined loops based on local patch by Chris (Chris) > > > > v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) > > > > v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) > > > > v5: Added wrapper for drm_mm_remove_node() (Chris) > > > > v6: Added get_pages call before pinning the pages (Tvrtko) > > Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) > > > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/i915_gem.c | 94 +++++++++++++++++++++++++++++++---------- > > 1 file changed, 72 insertions(+), 22 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index bf7f203..f11ec89 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -61,6 +61,23 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > > return obj->pin_display; > > } > > > > +static int > > +insert_mappable_node(struct drm_i915_private *i915, > > + struct drm_mm_node *node) > > +{ > > + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, > > + PAGE_SIZE, 0, 0, > > It does not look ideal that the function name is saying it will insert a > node and then it only inserts one page. > > In that respect previous version looked better to me but since it is > static and single use - whatever - this has been dragging for too long. We can take size as well, as a parameter passed to the wrapper. > > > + 0, i915->gtt.mappable_end, > > + DRM_MM_SEARCH_DEFAULT, > > + DRM_MM_CREATE_DEFAULT); > > +} > > + > > +static void > > +remove_mappable_node(struct drm_mm_node *node) > > +{ > > + drm_mm_remove_node(node); > > +} > > + > > /* some bookkeeping */ > > static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, > > size_t size) > > @@ -760,20 +777,34 @@ fast_user_write(struct io_mapping *mapping, > > * user into the GTT, uncached. > > */ > > static int > > -i915_gem_gtt_pwrite_fast(struct drm_device *dev, > > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, > > struct drm_i915_gem_object *obj, > > struct drm_i915_gem_pwrite *args, > > struct drm_file *file) > > { > > - struct drm_i915_private *dev_priv = dev->dev_private; > > - ssize_t remain; > > - loff_t offset, page_base; > > + struct drm_mm_node node; > > + uint64_t remain, offset; > > char __user *user_data; > > - int page_offset, page_length, ret; > > + int ret; > > > > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); > > - if (ret) > > - goto out; > > + if (ret) { > > + memset(&node, 0, sizeof(node)); > > + ret = insert_mappable_node(i915, &node); > > + if (ret) > > + goto out; > > + > > + ret = i915_gem_object_get_pages(obj); > > + if (ret) { > > + remove_mappable_node(&node); > > + goto out; > > + } > > + > > + i915_gem_object_pin_pages(obj); > > + } else { > > + node.start = i915_gem_obj_ggtt_offset(obj); > > + node.allocated = false; > > + } > > > > ret = i915_gem_object_set_to_gtt_domain(obj, true); > > if (ret) > > @@ -783,31 +814,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, > > if (ret) > > goto out_unpin; > > > > - user_data = to_user_ptr(args->data_ptr); > > - remain = args->size; > > - > > - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; > > - > > intel_fb_obj_invalidate(obj, ORIGIN_GTT); > > + obj->dirty = true; > > > > - while (remain > 0) { > > + user_data = to_user_ptr(args->data_ptr); > > + offset = args->offset; > > + remain = args->size; > > + while (remain) { > > /* Operation in this page > > * > > * page_base = page offset within aperture > > * page_offset = offset within page > > * page_length = bytes to copy for this page > > */ > > - page_base = offset & PAGE_MASK; > > - page_offset = offset_in_page(offset); > > - page_length = remain; > > - if ((page_offset + remain) > PAGE_SIZE) > > - page_length = PAGE_SIZE - page_offset; > > - > > + u32 page_base = node.start; > > Compiler does not complain about possible truncation here? I would > change it to a type of equivalent width just in case. Even before it was > loff_t. Never saw a compiler warning related to this truncation. Though this is not going to affect the functionality, I will update it to u64. Thanks, Ankit
On Tue, Dec 22, 2015 at 04:45:51PM +0530, Ankitprasad Sharma wrote: > > Compiler does not complain about possible truncation here? I would > > change it to a type of equivalent width just in case. Even before it was > > loff_t. > Never saw a compiler warning related to this truncation. Though this is > not going to affect the functionality, I will update it to u64. Which is ridiculous. -Chris
On 22/12/15 11:52, Chris Wilson wrote: > On Tue, Dec 22, 2015 at 04:45:51PM +0530, Ankitprasad Sharma wrote: >>> Compiler does not complain about possible truncation here? I would >>> change it to a type of equivalent width just in case. Even before it was >>> loff_t. >> Never saw a compiler warning related to this truncation. Though this is >> not going to affect the functionality, I will update it to u64. > > Which is ridiculous. Well you are campaigning for -Werror. :) Regards, Tvrtko
On Tue, Dec 22, 2015 at 12:03:32PM +0000, Tvrtko Ursulin wrote: > > On 22/12/15 11:52, Chris Wilson wrote: > >On Tue, Dec 22, 2015 at 04:45:51PM +0530, Ankitprasad Sharma wrote: > >>>Compiler does not complain about possible truncation here? I would > >>>change it to a type of equivalent width just in case. Even before it was > >>>loff_t. > >>Never saw a compiler warning related to this truncation. Though this is > >>not going to affect the functionality, I will update it to u64. > > > >Which is ridiculous. > > Well you are campaigning for -Werror. :) The standard compiler flags don't include warns for loss of precision, but I mean just using u64 here is silly and would imply something is very wrong with our understanding of the hardware and code. By all means have an assert that the mappable aperture cannot exceed 32bits (which will be quite a feat to fit inside the low 4GiB of addressable space). -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf7f203..f11ec89 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,23 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node) +{ + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, + PAGE_SIZE, 0, 0, + 0, i915->gtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -760,20 +777,34 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + memset(&node, 0, sizeof(node)); + ret = insert_mappable_node(i915, &node); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -783,31 +814,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? remain : page_length; + if (node.allocated) { + wmb(); + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, + I915_CACHE_NONE, + 0); + wmb(); + } else { + page_base += offset & PAGE_MASK; + } /* If we get a fault while copying data, then (presumably) our * source page isn't available. Return the error and we'll * retry in the slow path. */ - if (fast_user_write(dev_priv->gtt.mappable, page_base, + if (fast_user_write(i915->gtt.mappable, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; goto out_flush; @@ -821,7 +860,18 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, out_flush: intel_fb_obj_flush(obj, false, ORIGIN_GTT); out_unpin: - i915_gem_object_ggtt_unpin(obj); + if (node.allocated) { + wmb(); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + remove_mappable_node(&node); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put_pages(obj); + } + else { + i915_gem_object_ggtt_unpin(obj); + } out: return ret; } @@ -1086,7 +1136,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (obj->tiling_mode == I915_TILING_NONE && obj->base.write_domain != I915_GEM_DOMAIN_CPU && cpu_write_needs_clflush(obj)) { - ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); + ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. */