diff mbox series

[v2] drm/i915: Fix DMA mapped scatterlist lookup

Message ID 20200910145018.408983-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/i915: Fix DMA mapped scatterlist lookup | expand

Commit Message

Tvrtko Ursulin Sept. 10, 2020, 2:50 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As the previous patch fixed the places where we walk the whole scatterlist
for DMA addresses, this patch fixes the random lookup functionality.

To achieve this we have to add a second lookup iterator and add a
i915_gem_object_get_sg_dma helper, to be used analoguous to existing
i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
object and they are flushed at the same point for simplicity. (Strictly
speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
but today this conincides with unsetting of the pages in general.)

Partial VMA view is then fixed to use the new DMA lookup and properly
query sg length.

v2:
 * Checkpatch.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Tom Murphy <murphyt7@tcd.ie>
Cc: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    | 20 +++++++++++++++++-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ++++++++-------
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 21 ++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_ggtt.c          |  4 ++--
 drivers/gpu/drm/i915/i915_scatterlist.h       |  5 +++++
 6 files changed, 51 insertions(+), 18 deletions(-)

Comments

Chris Wilson Sept. 15, 2020, 8:49 a.m. UTC | #1
Quoting Tvrtko Ursulin (2020-09-10 15:50:18)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As the previous patch fixed the places where we walk the whole scatterlist
> for DMA addresses, this patch fixes the random lookup functionality.
> 
> To achieve this we have to add a second lookup iterator and add a
> i915_gem_object_get_sg_dma helper, to be used analoguous to existing
> i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
> object and they are flushed at the same point for simplicity. (Strictly
> speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
> but today this conincides with unsetting of the pages in general.)
> 
> Partial VMA view is then fixed to use the new DMA lookup and properly
> query sg length.
> 
> v2:
>  * Checkpatch.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Lu Baolu <baolu.lu@linux.intel.com>
> Cc: Tom Murphy <murphyt7@tcd.ie>
> Cc: Logan Gunthorpe <logang@deltatee.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 ++
>  drivers/gpu/drm/i915/gem/i915_gem_object.h    | 20 +++++++++++++++++-
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ++++++++-------
>  drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 21 ++++++++++++-------
>  drivers/gpu/drm/i915/gt/intel_ggtt.c          |  4 ++--
>  drivers/gpu/drm/i915/i915_scatterlist.h       |  5 +++++
>  6 files changed, 51 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index c8421fd9d2dc..ffeaf1b9b1bb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>         obj->mm.madv = I915_MADV_WILLNEED;
>         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
>         mutex_init(&obj->mm.get_page.lock);
> +       INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
> +       mutex_init(&obj->mm.get_dma_page.lock);
>  
>         if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
>                 i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index d46db8d8f38e..44c6910e2669 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>                                unsigned int tiling, unsigned int stride);
>  
>  struct scatterlist *
> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +                        struct i915_gem_object_page_iter *iter,
> +                        unsigned int n,
> +                        unsigned int *offset);
> +
> +static inline struct scatterlist *
>  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> -                      unsigned int n, unsigned int *offset);
> +                      unsigned int n,
> +                      unsigned int *offset)
> +{
> +       return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
> +}

I wonder if get_sg_phys() is worth it to make it completely clear the
difference between it and get_sg_dma() (and .get_phys_page?) ?

> +
> +static inline struct scatterlist *
> +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
> +                          unsigned int n,
> +                          unsigned int *offset)
> +{
> +       return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
> +}
>  
>  struct page *
>  i915_gem_object_get_page(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index b5c15557cc87..fedfebf13344 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -80,6 +80,14 @@ struct i915_mmap_offset {
>         struct rb_node offset;
>  };
>  
> +struct i915_gem_object_page_iter {
> +       struct scatterlist *sg_pos;
> +       unsigned int sg_idx; /* in pages, but 32bit eek! */
> +
> +       struct radix_tree_root radix;
> +       struct mutex lock; /* protects this cache */
> +};

All alternatives to trying to avoid a second random lookup were
squashed, it really is two lists within one scatterlist and we do use
both page/dma lookups in non-trivial ways.

> +
>  struct drm_i915_gem_object {
>         struct drm_gem_object base;
>  
> @@ -246,13 +254,8 @@ struct drm_i915_gem_object {
>  
>                 I915_SELFTEST_DECLARE(unsigned int page_mask);
>  
> -               struct i915_gem_object_page_iter {
> -                       struct scatterlist *sg_pos;
> -                       unsigned int sg_idx; /* in pages, but 32bit eek! */
> -
> -                       struct radix_tree_root radix;
> -                       struct mutex lock; /* protects this cache */
> -               } get_page;
> +               struct i915_gem_object_page_iter get_page;
> +               struct i915_gem_object_page_iter get_dma_page;
>  
>                 /**
>                  * Element within i915->mm.unbound_list or i915->mm.bound_list,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index e8a083743e09..04a3c1233f80 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>  
>         obj->mm.get_page.sg_pos = pages->sgl;
>         obj->mm.get_page.sg_idx = 0;
> +       obj->mm.get_dma_page.sg_pos = pages->sgl;
> +       obj->mm.get_dma_page.sg_idx = 0;
>  
>         obj->mm.pages = pages;
>  
> @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>         rcu_read_lock();
>         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
>                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
> +       radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
> +               radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
>         rcu_read_unlock();
>  }
>  
> @@ -424,11 +428,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
>  }
>  
>  struct scatterlist *
> -i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> -                      unsigned int n,
> -                      unsigned int *offset)
> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +                        struct i915_gem_object_page_iter *iter,
> +                        unsigned int n,
> +                        unsigned int *offset)
>  {
> -       struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
> +       const bool dma = iter == &obj->mm.get_dma_page;
>         struct scatterlist *sg;
>         unsigned int idx, count;
>  
> @@ -457,7 +462,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>  
>         sg = iter->sg_pos;
>         idx = iter->sg_idx;
> -       count = __sg_page_count(sg);
> +       count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
>  
>         while (idx + count <= n) {
>                 void *entry;
> @@ -485,7 +490,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>  
>                 idx += count;
>                 sg = ____sg_next(sg);
> -               count = __sg_page_count(sg);
> +               count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
>         }
>  
>  scan:
> @@ -503,7 +508,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>         while (idx + count <= n) {
>                 idx += count;
>                 sg = ____sg_next(sg);
> -               count = __sg_page_count(sg);
> +               count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);

Hmm. So for a coalesced dma entry, we must therefore end up with some
entries where the sg_dma_length is 0.

We then insert multiple sg for the same idx into the radix tree, causing
it to return an error, -EEXIST. We eat such errors and so overwrite the
empty entry with the final sg that actually has a valid length.

Ok. Looks like get_sg already handles zero length elements and you
caught all 3 __sg_page_count().

>         }
>  
>         *offset = n - idx;
> @@ -570,7 +575,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
>         struct scatterlist *sg;
>         unsigned int offset;
>  
> -       sg = i915_gem_object_get_sg(obj, n, &offset);
> +       sg = i915_gem_object_get_sg_dma(obj, n, &offset);
>  
>         if (len)
>                 *len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 81c05f551b9c..95e77d56c1ce 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>         if (ret)
>                 goto err_sg_alloc;
>  
> -       iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
> +       iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
>         GEM_BUG_ON(!iter);
>  
>         sg = st->sgl;
> @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>         do {
>                 unsigned int len;
>  
> -               len = min(iter->length - (offset << PAGE_SHIFT),
> +               len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
>                           count << PAGE_SHIFT);
>                 sg_set_page(sg, NULL, len, 0);
>                 sg_dma_address(sg) =

I didn't find any other users for get_sg() and this looks to catch all
the fixes required for using sg_dma.

> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
> index 510856887628..102d8d7007b6 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.h
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
> @@ -48,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg)
>         return sg->length >> PAGE_SHIFT;
>  }
>  
> +static inline int __sg_dma_page_count(const struct scatterlist *sg)
> +{
> +       return sg_dma_len(sg) >> PAGE_SHIFT;
> +}

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Do we need cc:stable?
-Chris
Tvrtko Ursulin Oct. 6, 2020, 9:27 a.m. UTC | #2
On 15/09/2020 09:49, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-09-10 15:50:18)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> As the previous patch fixed the places where we walk the whole scatterlist
>> for DMA addresses, this patch fixes the random lookup functionality.
>>
>> To achieve this we have to add a second lookup iterator and add a
>> i915_gem_object_get_sg_dma helper, to be used analoguous to existing
>> i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per
>> object and they are flushed at the same point for simplicity. (Strictly
>> speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages,
>> but today this conincides with unsetting of the pages in general.)
>>
>> Partial VMA view is then fixed to use the new DMA lookup and properly
>> query sg length.
>>
>> v2:
>>   * Checkpatch.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Lu Baolu <baolu.lu@linux.intel.com>
>> Cc: Tom Murphy <murphyt7@tcd.ie>
>> Cc: Logan Gunthorpe <logang@deltatee.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_object.c    |  2 ++
>>   drivers/gpu/drm/i915/gem/i915_gem_object.h    | 20 +++++++++++++++++-
>>   .../gpu/drm/i915/gem/i915_gem_object_types.h  | 17 ++++++++-------
>>   drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 21 ++++++++++++-------
>>   drivers/gpu/drm/i915/gt/intel_ggtt.c          |  4 ++--
>>   drivers/gpu/drm/i915/i915_scatterlist.h       |  5 +++++
>>   6 files changed, 51 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> index c8421fd9d2dc..ffeaf1b9b1bb 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>>          obj->mm.madv = I915_MADV_WILLNEED;
>>          INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
>>          mutex_init(&obj->mm.get_page.lock);
>> +       INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
>> +       mutex_init(&obj->mm.get_dma_page.lock);
>>   
>>          if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
>>                  i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> index d46db8d8f38e..44c6910e2669 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
>>                                 unsigned int tiling, unsigned int stride);
>>   
>>   struct scatterlist *
>> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +                        struct i915_gem_object_page_iter *iter,
>> +                        unsigned int n,
>> +                        unsigned int *offset);
>> +
>> +static inline struct scatterlist *
>>   i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> -                      unsigned int n, unsigned int *offset);
>> +                      unsigned int n,
>> +                      unsigned int *offset)
>> +{
>> +       return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
>> +}
> 
> I wonder if get_sg_phys() is worth it to make it completely clear the
> difference between it and get_sg_dma() (and .get_phys_page?) ?
> 
>> +
>> +static inline struct scatterlist *
>> +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
>> +                          unsigned int n,
>> +                          unsigned int *offset)
>> +{
>> +       return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
>> +}
>>   
>>   struct page *
>>   i915_gem_object_get_page(struct drm_i915_gem_object *obj,
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index b5c15557cc87..fedfebf13344 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -80,6 +80,14 @@ struct i915_mmap_offset {
>>          struct rb_node offset;
>>   };
>>   
>> +struct i915_gem_object_page_iter {
>> +       struct scatterlist *sg_pos;
>> +       unsigned int sg_idx; /* in pages, but 32bit eek! */
>> +
>> +       struct radix_tree_root radix;
>> +       struct mutex lock; /* protects this cache */
>> +};
> 
> All alternatives to trying to avoid a second random lookup were
> squashed, it really is two lists within one scatterlist and we do use
> both page/dma lookups in non-trivial ways.
> 
>> +
>>   struct drm_i915_gem_object {
>>          struct drm_gem_object base;
>>   
>> @@ -246,13 +254,8 @@ struct drm_i915_gem_object {
>>   
>>                  I915_SELFTEST_DECLARE(unsigned int page_mask);
>>   
>> -               struct i915_gem_object_page_iter {
>> -                       struct scatterlist *sg_pos;
>> -                       unsigned int sg_idx; /* in pages, but 32bit eek! */
>> -
>> -                       struct radix_tree_root radix;
>> -                       struct mutex lock; /* protects this cache */
>> -               } get_page;
>> +               struct i915_gem_object_page_iter get_page;
>> +               struct i915_gem_object_page_iter get_dma_page;
>>   
>>                  /**
>>                   * Element within i915->mm.unbound_list or i915->mm.bound_list,
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> index e8a083743e09..04a3c1233f80 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>>   
>>          obj->mm.get_page.sg_pos = pages->sgl;
>>          obj->mm.get_page.sg_idx = 0;
>> +       obj->mm.get_dma_page.sg_pos = pages->sgl;
>> +       obj->mm.get_dma_page.sg_idx = 0;
>>   
>>          obj->mm.pages = pages;
>>   
>> @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>>          rcu_read_lock();
>>          radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
>>                  radix_tree_delete(&obj->mm.get_page.radix, iter.index);
>> +       radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
>> +               radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
>>          rcu_read_unlock();
>>   }
>>   
>> @@ -424,11 +428,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
>>   }
>>   
>>   struct scatterlist *
>> -i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> -                      unsigned int n,
>> -                      unsigned int *offset)
>> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +                        struct i915_gem_object_page_iter *iter,
>> +                        unsigned int n,
>> +                        unsigned int *offset)
>>   {
>> -       struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
>> +       const bool dma = iter == &obj->mm.get_dma_page;
>>          struct scatterlist *sg;
>>          unsigned int idx, count;
>>   
>> @@ -457,7 +462,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>>   
>>          sg = iter->sg_pos;
>>          idx = iter->sg_idx;
>> -       count = __sg_page_count(sg);
>> +       count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
>>   
>>          while (idx + count <= n) {
>>                  void *entry;
>> @@ -485,7 +490,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>>   
>>                  idx += count;
>>                  sg = ____sg_next(sg);
>> -               count = __sg_page_count(sg);
>> +               count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
>>          }
>>   
>>   scan:
>> @@ -503,7 +508,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>>          while (idx + count <= n) {
>>                  idx += count;
>>                  sg = ____sg_next(sg);
>> -               count = __sg_page_count(sg);
>> +               count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
> 
> Hmm. So for a coalesced dma entry, we must therefore end up with some
> entries where the sg_dma_length is 0.
> 
> We then insert multiple sg for the same idx into the radix tree, causing
> it to return an error, -EEXIST. We eat such errors and so overwrite the
> empty entry with the final sg that actually has a valid length.
> 
> Ok. Looks like get_sg already handles zero length elements and you
> caught all 3 __sg_page_count().
> 
>>          }
>>   
>>          *offset = n - idx;
>> @@ -570,7 +575,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
>>          struct scatterlist *sg;
>>          unsigned int offset;
>>   
>> -       sg = i915_gem_object_get_sg(obj, n, &offset);
>> +       sg = i915_gem_object_get_sg_dma(obj, n, &offset);
>>   
>>          if (len)
>>                  *len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 81c05f551b9c..95e77d56c1ce 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>>          if (ret)
>>                  goto err_sg_alloc;
>>   
>> -       iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
>> +       iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
>>          GEM_BUG_ON(!iter);
>>   
>>          sg = st->sgl;
>> @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>>          do {
>>                  unsigned int len;
>>   
>> -               len = min(iter->length - (offset << PAGE_SHIFT),
>> +               len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
>>                            count << PAGE_SHIFT);
>>                  sg_set_page(sg, NULL, len, 0);
>>                  sg_dma_address(sg) =
> 
> I didn't find any other users for get_sg() and this looks to catch all
> the fixes required for using sg_dma.
> 
>> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
>> index 510856887628..102d8d7007b6 100644
>> --- a/drivers/gpu/drm/i915/i915_scatterlist.h
>> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
>> @@ -48,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg)
>>          return sg->length >> PAGE_SHIFT;
>>   }
>>   
>> +static inline int __sg_dma_page_count(const struct scatterlist *sg)
>> +{
>> +       return sg_dma_len(sg) >> PAGE_SHIFT;
>> +}
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks!

> Do we need cc:stable?

Probably not given how this oversight only gets exposed once the Intel 
IOMMU dma-api refactoring work lands.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c8421fd9d2dc..ffeaf1b9b1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -73,6 +73,8 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
+	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_dma_page.lock);
 
 	if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
 		i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d46db8d8f38e..44c6910e2669 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -275,8 +275,26 @@  int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			       unsigned int tiling, unsigned int stride);
 
 struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+			 struct i915_gem_object_page_iter *iter,
+			 unsigned int n,
+			 unsigned int *offset);
+
+static inline struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n, unsigned int *offset);
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
+}
+
+static inline struct scatterlist *
+i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
+			   unsigned int n,
+			   unsigned int *offset)
+{
+	return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
+}
 
 struct page *
 i915_gem_object_get_page(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b5c15557cc87..fedfebf13344 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -80,6 +80,14 @@  struct i915_mmap_offset {
 	struct rb_node offset;
 };
 
+struct i915_gem_object_page_iter {
+	struct scatterlist *sg_pos;
+	unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+	struct radix_tree_root radix;
+	struct mutex lock; /* protects this cache */
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -246,13 +254,8 @@  struct drm_i915_gem_object {
 
 		I915_SELFTEST_DECLARE(unsigned int page_mask);
 
-		struct i915_gem_object_page_iter {
-			struct scatterlist *sg_pos;
-			unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-			struct radix_tree_root radix;
-			struct mutex lock; /* protects this cache */
-		} get_page;
+		struct i915_gem_object_page_iter get_page;
+		struct i915_gem_object_page_iter get_dma_page;
 
 		/**
 		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index e8a083743e09..04a3c1233f80 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -33,6 +33,8 @@  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 
 	obj->mm.get_page.sg_pos = pages->sgl;
 	obj->mm.get_page.sg_idx = 0;
+	obj->mm.get_dma_page.sg_pos = pages->sgl;
+	obj->mm.get_dma_page.sg_idx = 0;
 
 	obj->mm.pages = pages;
 
@@ -155,6 +157,8 @@  static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
 		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
 	rcu_read_unlock();
 }
 
@@ -424,11 +428,12 @@  void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
 }
 
 struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n,
-		       unsigned int *offset)
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+			 struct i915_gem_object_page_iter *iter,
+			 unsigned int n,
+			 unsigned int *offset)
 {
-	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	const bool dma = iter == &obj->mm.get_dma_page;
 	struct scatterlist *sg;
 	unsigned int idx, count;
 
@@ -457,7 +462,7 @@  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 
 	sg = iter->sg_pos;
 	idx = iter->sg_idx;
-	count = __sg_page_count(sg);
+	count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
 
 	while (idx + count <= n) {
 		void *entry;
@@ -485,7 +490,7 @@  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 
 		idx += count;
 		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
+		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
 	}
 
 scan:
@@ -503,7 +508,7 @@  i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 	while (idx + count <= n) {
 		idx += count;
 		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
+		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
 	}
 
 	*offset = n - idx;
@@ -570,7 +575,7 @@  i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
 	struct scatterlist *sg;
 	unsigned int offset;
 
-	sg = i915_gem_object_get_sg(obj, n, &offset);
+	sg = i915_gem_object_get_sg_dma(obj, n, &offset);
 
 	if (len)
 		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 81c05f551b9c..95e77d56c1ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1383,7 +1383,7 @@  intel_partial_pages(const struct i915_ggtt_view *view,
 	if (ret)
 		goto err_sg_alloc;
 
-	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+	iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
 	GEM_BUG_ON(!iter);
 
 	sg = st->sgl;
@@ -1391,7 +1391,7 @@  intel_partial_pages(const struct i915_ggtt_view *view,
 	do {
 		unsigned int len;
 
-		len = min(iter->length - (offset << PAGE_SHIFT),
+		len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
 			  count << PAGE_SHIFT);
 		sg_set_page(sg, NULL, len, 0);
 		sg_dma_address(sg) =
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index 510856887628..102d8d7007b6 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -48,6 +48,11 @@  static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
+static inline int __sg_dma_page_count(const struct scatterlist *sg)
+{
+	return sg_dma_len(sg) >> PAGE_SHIFT;
+}
+
 static inline struct scatterlist *____sg_next(struct scatterlist *sg)
 {
 	++sg;