Message ID | 20200910145018.408983-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/i915: Fix DMA mapped scatterlist lookup | expand |
Quoting Tvrtko Ursulin (2020-09-10 15:50:18) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > As the previous patch fixed the places where we walk the whole scatterlist > for DMA addresses, this patch fixes the random lookup functionality. > > To achieve this we have to add a second lookup iterator and add a > i915_gem_object_get_sg_dma helper, to be used analoguous to existing > i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per > object and they are flushed at the same point for simplicity. (Strictly > speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages, > but today this conincides with unsetting of the pages in general.) > > Partial VMA view is then fixed to use the new DMA lookup and properly > query sg length. > > v2: > * Checkpatch. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Matthew Auld <matthew.auld@intel.com> > Cc: Lu Baolu <baolu.lu@linux.intel.com> > Cc: Tom Murphy <murphyt7@tcd.ie> > Cc: Logan Gunthorpe <logang@deltatee.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 ++ > drivers/gpu/drm/i915/gem/i915_gem_object.h | 20 +++++++++++++++++- > .../gpu/drm/i915/gem/i915_gem_object_types.h | 17 ++++++++------- > drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 ++++++++++++------- > drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 ++-- > drivers/gpu/drm/i915/i915_scatterlist.h | 5 +++++ > 6 files changed, 51 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index c8421fd9d2dc..ffeaf1b9b1bb 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, > obj->mm.madv = I915_MADV_WILLNEED; > INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); > mutex_init(&obj->mm.get_page.lock); > + INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); > + mutex_init(&obj->mm.get_dma_page.lock); > > if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) > i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h > index d46db8d8f38e..44c6910e2669 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h > @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, > unsigned int tiling, unsigned int stride); > > struct scatterlist * > +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > + struct i915_gem_object_page_iter *iter, > + unsigned int n, > + unsigned int *offset); > + > +static inline struct scatterlist * > i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > - unsigned int n, unsigned int *offset); > + unsigned int n, > + unsigned int *offset) > +{ > + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); > +} I wonder if get_sg_phys() is worth it to make it completely clear the difference between it and get_sg_dma() (and .get_phys_page?) ? > + > +static inline struct scatterlist * > +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, > + unsigned int n, > + unsigned int *offset) > +{ > + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); > +} > > struct page * > i915_gem_object_get_page(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > index b5c15557cc87..fedfebf13344 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h > @@ -80,6 +80,14 @@ struct i915_mmap_offset { > struct rb_node offset; > }; > > +struct i915_gem_object_page_iter { > + struct scatterlist *sg_pos; > + unsigned int sg_idx; /* in pages, but 32bit eek! */ > + > + struct radix_tree_root radix; > + struct mutex lock; /* protects this cache */ > +}; All alternatives to trying to avoid a second random lookup were squashed, it really is two lists within one scatterlist and we do use both page/dma lookups in non-trivial ways. > + > struct drm_i915_gem_object { > struct drm_gem_object base; > > @@ -246,13 +254,8 @@ struct drm_i915_gem_object { > > I915_SELFTEST_DECLARE(unsigned int page_mask); > > - struct i915_gem_object_page_iter { > - struct scatterlist *sg_pos; > - unsigned int sg_idx; /* in pages, but 32bit eek! */ > - > - struct radix_tree_root radix; > - struct mutex lock; /* protects this cache */ > - } get_page; > + struct i915_gem_object_page_iter get_page; > + struct i915_gem_object_page_iter get_dma_page; > > /** > * Element within i915->mm.unbound_list or i915->mm.bound_list, > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > index e8a083743e09..04a3c1233f80 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, > > obj->mm.get_page.sg_pos = pages->sgl; > obj->mm.get_page.sg_idx = 0; > + obj->mm.get_dma_page.sg_pos = pages->sgl; > + obj->mm.get_dma_page.sg_idx = 0; > > obj->mm.pages = pages; > > @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) > rcu_read_lock(); > radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) > radix_tree_delete(&obj->mm.get_page.radix, iter.index); > + radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0) > + radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index); > rcu_read_unlock(); > } > > @@ -424,11 +428,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj) > } > > struct scatterlist * > -i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > - unsigned int n, > - unsigned int *offset) > +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > + struct i915_gem_object_page_iter *iter, > + unsigned int n, > + unsigned int *offset) > { > - struct i915_gem_object_page_iter *iter = &obj->mm.get_page; > + const bool dma = iter == &obj->mm.get_dma_page; > struct scatterlist *sg; > unsigned int idx, count; > > @@ -457,7 +462,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > > sg = iter->sg_pos; > idx = iter->sg_idx; > - count = __sg_page_count(sg); > + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); > > while (idx + count <= n) { > void *entry; > @@ -485,7 +490,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > > idx += count; > sg = ____sg_next(sg); > - count = __sg_page_count(sg); > + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); > } > > scan: > @@ -503,7 +508,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, > while (idx + count <= n) { > idx += count; > sg = ____sg_next(sg); > - count = __sg_page_count(sg); > + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); Hmm. So for a coalesced dma entry, we must therefore end up with some entries where the sg_dma_length is 0. We then insert multiple sg for the same idx into the radix tree, causing it to return an error, -EEXIST. We eat such errors and so overwrite the empty entry with the final sg that actually has a valid length. Ok. Looks like get_sg already handles zero length elements and you caught all 3 __sg_page_count(). > } > > *offset = n - idx; > @@ -570,7 +575,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, > struct scatterlist *sg; > unsigned int offset; > > - sg = i915_gem_object_get_sg(obj, n, &offset); > + sg = i915_gem_object_get_sg_dma(obj, n, &offset); > > if (len) > *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c > index 81c05f551b9c..95e77d56c1ce 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, > if (ret) > goto err_sg_alloc; > > - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); > + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); > GEM_BUG_ON(!iter); > > sg = st->sgl; > @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, > do { > unsigned int len; > > - len = min(iter->length - (offset << PAGE_SHIFT), > + len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), > count << PAGE_SHIFT); > sg_set_page(sg, NULL, len, 0); > sg_dma_address(sg) = I didn't find any other users for get_sg() and this looks to catch all the fixes required for using sg_dma. > diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h > index 510856887628..102d8d7007b6 100644 > --- a/drivers/gpu/drm/i915/i915_scatterlist.h > +++ b/drivers/gpu/drm/i915/i915_scatterlist.h > @@ -48,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg) > return sg->length >> PAGE_SHIFT; > } > > +static inline int __sg_dma_page_count(const struct scatterlist *sg) > +{ > + return sg_dma_len(sg) >> PAGE_SHIFT; > +} Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Do we need cc:stable? -Chris
On 15/09/2020 09:49, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-09-10 15:50:18) >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> As the previous patch fixed the places where we walk the whole scatterlist >> for DMA addresses, this patch fixes the random lookup functionality. >> >> To achieve this we have to add a second lookup iterator and add a >> i915_gem_object_get_sg_dma helper, to be used analoguous to existing >> i915_gem_object_get_sg_dma. Therefore two lookup caches are maintained per >> object and they are flushed at the same point for simplicity. (Strictly >> speaking the DMA cache should be flushed from i915_gem_gtt_finish_pages, >> but today this conincides with unsetting of the pages in general.) >> >> Partial VMA view is then fixed to use the new DMA lookup and properly >> query sg length. >> >> v2: >> * Checkpatch. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Matthew Auld <matthew.auld@intel.com> >> Cc: Lu Baolu <baolu.lu@linux.intel.com> >> Cc: Tom Murphy <murphyt7@tcd.ie> >> Cc: Logan Gunthorpe <logang@deltatee.com> >> --- >> drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 ++ >> drivers/gpu/drm/i915/gem/i915_gem_object.h | 20 +++++++++++++++++- >> .../gpu/drm/i915/gem/i915_gem_object_types.h | 17 ++++++++------- >> drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 ++++++++++++------- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 ++-- >> drivers/gpu/drm/i915/i915_scatterlist.h | 5 +++++ >> 6 files changed, 51 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c >> index c8421fd9d2dc..ffeaf1b9b1bb 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c >> @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, >> obj->mm.madv = I915_MADV_WILLNEED; >> INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); >> mutex_init(&obj->mm.get_page.lock); >> + INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); >> + mutex_init(&obj->mm.get_dma_page.lock); >> >> if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) >> i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h >> index d46db8d8f38e..44c6910e2669 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h >> @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, >> unsigned int tiling, unsigned int stride); >> >> struct scatterlist * >> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> + struct i915_gem_object_page_iter *iter, >> + unsigned int n, >> + unsigned int *offset); >> + >> +static inline struct scatterlist * >> i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> - unsigned int n, unsigned int *offset); >> + unsigned int n, >> + unsigned int *offset) >> +{ >> + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); >> +} > > I wonder if get_sg_phys() is worth it to make it completely clear the > difference between it and get_sg_dma() (and .get_phys_page?) ? > >> + >> +static inline struct scatterlist * >> +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, >> + unsigned int n, >> + unsigned int *offset) >> +{ >> + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); >> +} >> >> struct page * >> i915_gem_object_get_page(struct drm_i915_gem_object *obj, >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >> index b5c15557cc87..fedfebf13344 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h >> @@ -80,6 +80,14 @@ struct i915_mmap_offset { >> struct rb_node offset; >> }; >> >> +struct i915_gem_object_page_iter { >> + struct scatterlist *sg_pos; >> + unsigned int sg_idx; /* in pages, but 32bit eek! */ >> + >> + struct radix_tree_root radix; >> + struct mutex lock; /* protects this cache */ >> +}; > > All alternatives to trying to avoid a second random lookup were > squashed, it really is two lists within one scatterlist and we do use > both page/dma lookups in non-trivial ways. > >> + >> struct drm_i915_gem_object { >> struct drm_gem_object base; >> >> @@ -246,13 +254,8 @@ struct drm_i915_gem_object { >> >> I915_SELFTEST_DECLARE(unsigned int page_mask); >> >> - struct i915_gem_object_page_iter { >> - struct scatterlist *sg_pos; >> - unsigned int sg_idx; /* in pages, but 32bit eek! */ >> - >> - struct radix_tree_root radix; >> - struct mutex lock; /* protects this cache */ >> - } get_page; >> + struct i915_gem_object_page_iter get_page; >> + struct i915_gem_object_page_iter get_dma_page; >> >> /** >> * Element within i915->mm.unbound_list or i915->mm.bound_list, >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c >> index e8a083743e09..04a3c1233f80 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c >> @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, >> >> obj->mm.get_page.sg_pos = pages->sgl; >> obj->mm.get_page.sg_idx = 0; >> + obj->mm.get_dma_page.sg_pos = pages->sgl; >> + obj->mm.get_dma_page.sg_idx = 0; >> >> obj->mm.pages = pages; >> >> @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) >> rcu_read_lock(); >> radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) >> radix_tree_delete(&obj->mm.get_page.radix, iter.index); >> + radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0) >> + radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index); >> rcu_read_unlock(); >> } >> >> @@ -424,11 +428,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj) >> } >> >> struct scatterlist * >> -i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> - unsigned int n, >> - unsigned int *offset) >> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> + struct i915_gem_object_page_iter *iter, >> + unsigned int n, >> + unsigned int *offset) >> { >> - struct i915_gem_object_page_iter *iter = &obj->mm.get_page; >> + const bool dma = iter == &obj->mm.get_dma_page; >> struct scatterlist *sg; >> unsigned int idx, count; >> >> @@ -457,7 +462,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> >> sg = iter->sg_pos; >> idx = iter->sg_idx; >> - count = __sg_page_count(sg); >> + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); >> >> while (idx + count <= n) { >> void *entry; >> @@ -485,7 +490,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> >> idx += count; >> sg = ____sg_next(sg); >> - count = __sg_page_count(sg); >> + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); >> } >> >> scan: >> @@ -503,7 +508,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, >> while (idx + count <= n) { >> idx += count; >> sg = ____sg_next(sg); >> - count = __sg_page_count(sg); >> + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); > > Hmm. So for a coalesced dma entry, we must therefore end up with some > entries where the sg_dma_length is 0. > > We then insert multiple sg for the same idx into the radix tree, causing > it to return an error, -EEXIST. We eat such errors and so overwrite the > empty entry with the final sg that actually has a valid length. > > Ok. Looks like get_sg already handles zero length elements and you > caught all 3 __sg_page_count(). > >> } >> >> *offset = n - idx; >> @@ -570,7 +575,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, >> struct scatterlist *sg; >> unsigned int offset; >> >> - sg = i915_gem_object_get_sg(obj, n, &offset); >> + sg = i915_gem_object_get_sg_dma(obj, n, &offset); >> >> if (len) >> *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 81c05f551b9c..95e77d56c1ce 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, >> if (ret) >> goto err_sg_alloc; >> >> - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); >> + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); >> GEM_BUG_ON(!iter); >> >> sg = st->sgl; >> @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, >> do { >> unsigned int len; >> >> - len = min(iter->length - (offset << PAGE_SHIFT), >> + len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), >> count << PAGE_SHIFT); >> sg_set_page(sg, NULL, len, 0); >> sg_dma_address(sg) = > > I didn't find any other users for get_sg() and this looks to catch all > the fixes required for using sg_dma. > >> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h >> index 510856887628..102d8d7007b6 100644 >> --- a/drivers/gpu/drm/i915/i915_scatterlist.h >> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h >> @@ -48,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg) >> return sg->length >> PAGE_SHIFT; >> } >> >> +static inline int __sg_dma_page_count(const struct scatterlist *sg) >> +{ >> + return sg_dma_len(sg) >> PAGE_SHIFT; >> +} > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Thanks! > Do we need cc:stable? Probably not given how this oversight only gets exposed once the Intel IOMMU dma-api refactoring work lands. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index c8421fd9d2dc..ffeaf1b9b1bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -73,6 +73,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); + INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->mm.get_dma_page.lock); if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d46db8d8f38e..44c6910e2669 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -275,8 +275,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, unsigned int tiling, unsigned int stride); struct scatterlist * +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + unsigned int n, + unsigned int *offset); + +static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - unsigned int n, unsigned int *offset); + unsigned int n, + unsigned int *offset) +{ + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); +} + +static inline struct scatterlist * +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, + unsigned int n, + unsigned int *offset) +{ + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); +} struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index b5c15557cc87..fedfebf13344 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -80,6 +80,14 @@ struct i915_mmap_offset { struct rb_node offset; }; +struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -246,13 +254,8 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(unsigned int page_mask); - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ - - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; + struct i915_gem_object_page_iter get_page; + struct i915_gem_object_page_iter get_dma_page; /** * Element within i915->mm.unbound_list or i915->mm.bound_list, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index e8a083743e09..04a3c1233f80 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.get_page.sg_pos = pages->sgl; obj->mm.get_page.sg_idx = 0; + obj->mm.get_dma_page.sg_pos = pages->sgl; + obj->mm.get_dma_page.sg_idx = 0; obj->mm.pages = pages; @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_lock(); radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) radix_tree_delete(&obj->mm.get_page.radix, iter.index); + radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0) + radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index); rcu_read_unlock(); } @@ -424,11 +428,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj) } struct scatterlist * -i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - unsigned int n, - unsigned int *offset) +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + unsigned int n, + unsigned int *offset) { - struct i915_gem_object_page_iter *iter = &obj->mm.get_page; + const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; unsigned int idx, count; @@ -457,7 +462,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, sg = iter->sg_pos; idx = iter->sg_idx; - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); while (idx + count <= n) { void *entry; @@ -485,7 +490,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, idx += count; sg = ____sg_next(sg); - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); } scan: @@ -503,7 +508,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, while (idx + count <= n) { idx += count; sg = ____sg_next(sg); - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); } *offset = n - idx; @@ -570,7 +575,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg(obj, n, &offset); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 81c05f551b9c..95e77d56c1ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, do { unsigned int len; - len = min(iter->length - (offset << PAGE_SHIFT), + len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), count << PAGE_SHIFT); sg_set_page(sg, NULL, len, 0); sg_dma_address(sg) = diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 510856887628..102d8d7007b6 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -48,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg) return sg->length >> PAGE_SHIFT; } +static inline int __sg_dma_page_count(const struct scatterlist *sg) +{ + return sg_dma_len(sg) >> PAGE_SHIFT; +} + static inline struct scatterlist *____sg_next(struct scatterlist *sg) { ++sg;