Message ID | 20231010184423.2118908-13-jonathan.cavitt@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Implement range-based TLB | expand |
On 10/10/2023 19:44, Jonathan Cavitt wrote: > For platforms supporting selective tlb invalidations, we don't need to > do a full tlb invalidation. Rather do a range based tlb invalidation for > every unbind of purged vma belongs to an active vm. > > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> > Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> > Cc: Fei Yang <fei.yang@intel.com> > Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> > Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_ppgtt.c | 2 +- > drivers/gpu/drm/i915/i915_vma.c | 14 +++++++++----- > drivers/gpu/drm/i915/i915_vma.h | 3 ++- > 3 files changed, 12 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c > index d07a4f97b9434..b43dae3cbd59f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c > @@ -211,7 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, > return; > > vm->clear_range(vm, vma_res->start, vma_res->vma_size); > - vma_invalidate_tlb(vm, vma_res->tlb); > + vma_invalidate_tlb(vm, vma_res->tlb, vma_res->start, vma_res->vma_size); > } > > static unsigned long pd_count(u64 size, int shift) > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index d09aad34ba37f..cb05d794f0d0f 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -1339,7 +1339,8 @@ I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma) > return err; > } > > -void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) > +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb > + u64 start, u64 size) > { > struct intel_gt *gt; > int id; > @@ -1355,9 +1356,11 @@ void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) > * the most recent TLB invalidation seqno, and if we have not yet > * flushed the TLBs upon release, perform a full invalidation. > */ > - for_each_gt(gt, vm->i915, id) > - WRITE_ONCE(tlb[id], > - intel_gt_next_invalidate_tlb_full(gt)); > + for_each_gt(gt, vm->i915, id) { > + if (!intel_gt_invalidate_tlb_range(gt, start, size)) > + WRITE_ONCE(tlb[id], > + intel_gt_next_invalidate_tlb_full(gt)); > + } > } > > static void __vma_put_pages(struct i915_vma *vma, unsigned int count) > @@ -2041,7 +2044,8 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) > dma_fence_put(unbind_fence); > unbind_fence = NULL; > } > - vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb); > + vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb, > + vma->node.start, vma->size); The RFC looks like just what we needed so I'll drop an ack on the other patch series. Thanks for sending it out so quickly. Regards, Tvrtko > } > > /* > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > index e356dfb883d34..5a604aad55dfe 100644 > --- a/drivers/gpu/drm/i915/i915_vma.h > +++ b/drivers/gpu/drm/i915/i915_vma.h > @@ -260,7 +260,8 @@ bool i915_vma_misplaced(const struct i915_vma *vma, > u64 size, u64 alignment, u64 flags); > void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); > void i915_vma_revoke_mmap(struct i915_vma *vma); > -void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb); > +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb, > + u64 start, u64 size); > struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); > int __i915_vma_unbind(struct i915_vma *vma); > int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index d07a4f97b9434..b43dae3cbd59f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -211,7 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, return; vm->clear_range(vm, vma_res->start, vma_res->vma_size); - vma_invalidate_tlb(vm, vma_res->tlb); + vma_invalidate_tlb(vm, vma_res->tlb, vma_res->start, vma_res->vma_size); } static unsigned long pd_count(u64 size, int shift) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d09aad34ba37f..cb05d794f0d0f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1339,7 +1339,8 @@ I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma) return err; } -void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb + u64 start, u64 size) { struct intel_gt *gt; int id; @@ -1355,9 +1356,11 @@ void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) * the most recent TLB invalidation seqno, and if we have not yet * flushed the TLBs upon release, perform a full invalidation. */ - for_each_gt(gt, vm->i915, id) - WRITE_ONCE(tlb[id], - intel_gt_next_invalidate_tlb_full(gt)); + for_each_gt(gt, vm->i915, id) { + if (!intel_gt_invalidate_tlb_range(gt, start, size)) + WRITE_ONCE(tlb[id], + intel_gt_next_invalidate_tlb_full(gt)); + } } static void __vma_put_pages(struct i915_vma *vma, unsigned int count) @@ -2041,7 +2044,8 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) dma_fence_put(unbind_fence); unbind_fence = NULL; } - vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb); + vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb, + vma->node.start, vma->size); } /* diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e356dfb883d34..5a604aad55dfe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -260,7 +260,8 @@ bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); -void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb); +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb, + u64 start, u64 size); struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma);