diff mbox series

[v3,3/3] drm/i915/gt: document TLB cache invalidation functions

Message ID cc68d62a1979ea859b447b94413e100472331f57.1659598090.git.mchehab@kernel.org (mailing list archive)
State New, archived
Headers show
Series Move TLB invalidation code for its own file and document it | expand

Commit Message

Mauro Carvalho Chehab Aug. 4, 2022, 7:37 a.m. UTC
Add a description for the TLB cache invalidation algorithm and for
the related kAPI functions.

Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---

To avoid mailbombing on a large number of people, only mailing lists were C/C on the cover.
See [PATCH v3 0/3] at: https://lore.kernel.org/all/cover.1659598090.git.mchehab@kernel.org/

 Documentation/gpu/i915.rst          |  7 ++
 drivers/gpu/drm/i915/gt/intel_tlb.c | 25 ++++++++
 drivers/gpu/drm/i915/gt/intel_tlb.h | 99 +++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)

Comments

Randy Dunlap Aug. 4, 2022, 7 p.m. UTC | #1
Hi Mauro,

On 8/4/22 00:37, Mauro Carvalho Chehab wrote:
> Add a description for the TLB cache invalidation algorithm and for
> the related kAPI functions.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
> ---
> 
> To avoid mailbombing on a large number of people, only mailing lists were C/C on the cover.
> See [PATCH v3 0/3] at: https://lore.kernel.org/all/cover.1659598090.git.mchehab@kernel.org/
> 
>  Documentation/gpu/i915.rst          |  7 ++
>  drivers/gpu/drm/i915/gt/intel_tlb.c | 25 ++++++++
>  drivers/gpu/drm/i915/gt/intel_tlb.h | 99 +++++++++++++++++++++++++++++
>  3 files changed, 131 insertions(+)
> 

> diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
> index af8cae979489..16b918ffe824 100644
> --- a/drivers/gpu/drm/i915/gt/intel_tlb.c
> +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
> @@ -145,6 +145,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
>  }
>  
> +/**
> + * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
> + * @gt: GT structure

In multiple places (here and below) it would be nice to know what a
GT structure is. I looked thru multiple C and header files yesterday
and didn't find any comments about it.

Just saying that @gt is a GT structure isn't very helpful, other
than making kernel-doc shut up.

> + * @seqno: sequence number
> + *
> + * Do a full TLB cache invalidation if the @seqno is bigger than the last
> + * full TLB cache invalidation.
> + *
> + * Note:
> + * The TLB cache invalidation logic depends on GEN-specific registers.
> + * It currently supports MMIO-based TLB flush for GEN8 to GEN12.
> + */
>  void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
>  {
>  	intel_wakeref_t wakeref;
> @@ -171,12 +183,25 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
>  	}
>  }
>  
> +/**
> + * intel_gt_init_tlb - initialize TLB-specific vars
> + * @gt: GT structure
> + *
> + * TLB cache invalidation logic internally uses some resources that require
> + * initialization. Should be called before doing any TLB cache invalidation.
> + */
>  void intel_gt_init_tlb(struct intel_gt *gt)
>  {
>  	mutex_init(&gt->tlb.invalidate_lock);
>  	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
>  }
>  
> +/**
> + * intel_gt_fini_tlb - free TLB-specific vars
> + * @gt: GT structure
> + *
> + * Frees any resources needed by TLB cache invalidation logic.
> + */
>  void intel_gt_fini_tlb(struct intel_gt *gt)
>  {
>  	mutex_destroy(&gt->tlb.invalidate_lock);
> diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
> index 46ce25bf5afe..2838c051f872 100644
> --- a/drivers/gpu/drm/i915/gt/intel_tlb.h
> +++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
> @@ -11,16 +11,115 @@
>  
>  #include "intel_gt_types.h"
>  
> +/**
> + * DOC: TLB cache invalidation logic
> + *
...
> +
>  void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
>  
>  void intel_gt_init_tlb(struct intel_gt *gt);
>  void intel_gt_fini_tlb(struct intel_gt *gt);
>  
> +/**
> + * intel_gt_tlb_seqno - Returns the current TLB invlidation sequence number
> + * @gt: GT structure
> + *
> + * There's no need to lock while calling it, as seqprop_sequence is thread-safe
> + */
>  static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
>  {
>  	return seqprop_sequence(&gt->tlb.seqno);
>  }
>  
> +/**
> + * intel_gt_next_invalidate_tlb_full - Returns the next TLB full invalidation
> + *	sequence number
> + * @gt: GT structure
> + *
> + * There's no need to lock while calling it, as seqprop_sequence is thread-safe
> + */
>  static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
>  {
>  	return intel_gt_tlb_seqno(gt) | 1;

thanks.
Andi Shyti Aug. 5, 2022, 9:08 a.m. UTC | #2
Hi Randy,

> > +/**
> > + * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
> > + * @gt: GT structure
> 
> In multiple places (here and below) it would be nice to know what a
> GT structure is. I looked thru multiple C and header files yesterday
> and didn't find any comments about it.
> 
> Just saying that @gt is a GT structure isn't very helpful, other
> than making kernel-doc shut up.

the 'gt' belongs to the drivers/gpu/drm/i915/gt/ subsystem and
it's widely used a throughout i915.

I think it's inappropriate to describe it just here. On the other
hand I agree that a better documentation is required for the GT
itself where other parts can point to.

Andi
Tvrtko Ursulin Aug. 5, 2022, 9:24 a.m. UTC | #3
On 05/08/2022 10:08, Andi Shyti wrote:
> Hi Randy,
> 
>>> +/**
>>> + * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
>>> + * @gt: GT structure
>>
>> In multiple places (here and below) it would be nice to know what a
>> GT structure is. I looked thru multiple C and header files yesterday
>> and didn't find any comments about it.
>>
>> Just saying that @gt is a GT structure isn't very helpful, other
>> than making kernel-doc shut up.
> 
> the 'gt' belongs to the drivers/gpu/drm/i915/gt/ subsystem and
> it's widely used a throughout i915.
> 
> I think it's inappropriate to describe it just here. On the other
> hand I agree that a better documentation is required for the GT
> itself where other parts can point to.

Yeah agreed there is no point of copy pasting some explanation all over 
the place. Could we just do s/GT structure/struct intel_gt/, or "pointer 
to struct intel_gt to operate on", would that be good enough?

Regards,

Tvrtko
Mauro Carvalho Chehab Aug. 5, 2022, 10:30 a.m. UTC | #4
On Fri, 5 Aug 2022 10:24:25 +0100
Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote:

> On 05/08/2022 10:08, Andi Shyti wrote:
> > Hi Randy,
> >   
> >>> +/**
> >>> + * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
> >>> + * @gt: GT structure  
> >>
> >> In multiple places (here and below) it would be nice to know what a
> >> GT structure is. I looked thru multiple C and header files yesterday
> >> and didn't find any comments about it.
> >>
> >> Just saying that @gt is a GT structure isn't very helpful, other
> >> than making kernel-doc shut up.  
> > 
> > the 'gt' belongs to the drivers/gpu/drm/i915/gt/ subsystem and
> > it's widely used a throughout i915.
> > 
> > I think it's inappropriate to describe it just here. On the other
> > hand I agree that a better documentation is required for the GT
> > itself where other parts can point to.  

GT is actually a well-understood term for GPU developers. It is an alias
for:

	https://en.wikipedia.org/wiki/Intel_Graphics_Technology

It is basically the "core" of the GPU, where the engine units sit.

I agree with Andi: terms like this should likely be defined on a glossary
at i915.rst file.

> Yeah agreed there is no point of copy pasting some explanation all over 
> the place. Could we just do s/GT structure/struct intel_gt/, or "pointer 
> to struct intel_gt to operate on", would that be good enough?

IMO, it won't make any difference. kerneldoc already says that the
parameter has struct intel_gt type on its output:

	.. c:function:: void intel_gt_fini_tlb (struct intel_gt *gt)

	   free TLB-specific vars

	**Parameters**

	``struct intel_gt *gt``
	  GT structure
	
	**Description**

	Frees any resources needed by TLB cache invalidation logic.

This struct somewhat is similar to struct device. This is a container
struct that has the common data needed to control the GT hardware.

Almost all functions that work with GT needs it. There's not much sense
to describe it everywhere. What makes sense is to have struct intel_gt
documented at intel_gt_types.h, letting the build system to generate 
hiperlinks to it.

This is easier said than done...

Regards,
Mauro
Andi Shyti Aug. 8, 2022, 4:58 p.m. UTC | #5
Hi Mauro,

On Thu, Aug 04, 2022 at 09:37:24AM +0200, Mauro Carvalho Chehab wrote:
> Add a description for the TLB cache invalidation algorithm and for
> the related kAPI functions.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Andi
diff mbox series

Patch

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 4e59db1cfb00..46911fdd79e8 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -58,6 +58,13 @@  Intel GVT-g Host Support(vGPU device model)
 .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
    :internal:
 
+TLB cache invalidation
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_tlb.h
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_tlb.c
+
 Workarounds
 -----------
 
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index af8cae979489..16b918ffe824 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -145,6 +145,18 @@  static void mmio_invalidate_full(struct intel_gt *gt)
 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
 }
 
+/**
+ * intel_gt_invalidate_tlb_full - do full TLB cache invalidation
+ * @gt: GT structure
+ * @seqno: sequence number
+ *
+ * Do a full TLB cache invalidation if the @seqno is bigger than the last
+ * full TLB cache invalidation.
+ *
+ * Note:
+ * The TLB cache invalidation logic depends on GEN-specific registers.
+ * It currently supports MMIO-based TLB flush for GEN8 to GEN12.
+ */
 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
 {
 	intel_wakeref_t wakeref;
@@ -171,12 +183,25 @@  void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
 	}
 }
 
+/**
+ * intel_gt_init_tlb - initialize TLB-specific vars
+ * @gt: GT structure
+ *
+ * TLB cache invalidation logic internally uses some resources that require
+ * initialization. Should be called before doing any TLB cache invalidation.
+ */
 void intel_gt_init_tlb(struct intel_gt *gt)
 {
 	mutex_init(&gt->tlb.invalidate_lock);
 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
 }
 
+/**
+ * intel_gt_fini_tlb - free TLB-specific vars
+ * @gt: GT structure
+ *
+ * Frees any resources needed by TLB cache invalidation logic.
+ */
 void intel_gt_fini_tlb(struct intel_gt *gt)
 {
 	mutex_destroy(&gt->tlb.invalidate_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
index 46ce25bf5afe..2838c051f872 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.h
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
@@ -11,16 +11,115 @@ 
 
 #include "intel_gt_types.h"
 
+/**
+ * DOC: TLB cache invalidation logic
+ *
+ * The way the current algorithm works is that a struct drm_i915_gem_object can
+ * be created on any order. At unbind/evict time, the object is warranted that
+ * it won't be used anymore. So, a sequence number provided by
+ * intel_gt_next_invalidate_tlb_full() is stored on it. This can happen either
+ * at __vma_put_pages() - for VMA sync unbind, or at ppgtt_unbind_vma() - for
+ * VMA async VMA bind.
+ *
+ * At __i915_gem_object_unset_pages(), intel_gt_invalidate_tlb_full() is called,
+ * where it checks if the sequence number of the object was already invalidated
+ * or not. If not, it flushes the TLB and increments the sequence number::
+ *
+ *   void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
+ *   {
+ *   ...
+ * 	with_intel_gt_pm_if_awake(gt, wakeref) {
+ * 		mutex_lock(&gt->tlb.invalidate_lock);
+ * 		if (tlb_seqno_passed(gt, seqno))
+ * 				goto unlock;
+ *
+ * 		// Some code to do TLB invalidation
+ *   ...
+ *
+ * 		write_seqcount_invalidate(&gt->tlb.seqno); // increment seqno
+ * 		mutex_lock(&gt->tlb.invalidate_lock);
+ *      }
+ *
+ * So, let's say the current seqno is 2 and 3 new objects were created,
+ * on this order::
+ *
+ * 	obj1
+ * 	obj2
+ * 	obj3
+ *
+ * They can be unbind/evict on a different order. At unbind/evict time,
+ * the mm.tlb will be stamped with the sequence number, using the number
+ * from the last TLB flush, plus 1.
+ *
+ * Different threads may be used on unbind/evict and/or unset pages.
+ * As the logic at intel_gt_invalidate_tlb_full() is protected by a mutex,
+ * for simplicity, let's consider just two threads:
+ *
+ * +-------------------+-------------------------+---------------------------------+
+ * | sequence number   | Thread 0                | Thread 1                        +
+ * +===================+=========================+=================================+
+ * | seqno=2           |                         |                                 |
+ * |                   +-------------------------+---------------------------------+
+ * |                   | unbind/evict obj3.      |                                 |
+ * |                   |                         |                                 |
+ * |                   | obj3.mm.tlb = seqno | 1 |                                 |
+ * |                   | // obj3.mm.tlb = 3      |                                 |
+ * |                   +-------------------------+---------------------------------+
+ * |                   | unbind/evict obj1.      |                                 |
+ * |                   |                         |                                 |
+ * |                   | obj1.mm.tlb = seqno | 1 |                                 |
+ * |                   | // obj1.mm.tlb = 3      |                                 |
+ * |                   +-------------------------+---------------------------------+
+ * |                   |                         | __i915_gem_object_unset_pages() |
+ * |                   |                         | called for obj3 => TLB flush    |
+ * |                   |                         | invalidating both obj1 and obj2.|
+ * |                   |                         |                                 |
+ * |                   |                         | seqno += 2                      |
+ * +-------------------+-------------------------+---------------------------------+
+ * | seqno=4           |                         |                                 |
+ * |                   +-------------------------+---------------------------------+
+ * |                   | unbind/evict obj2.      |                                 |
+ * |                   |                         |                                 |
+ * |                   | obj2.mm.tlb = seqno | 1 |                                 |
+ * |                   | // obj2.mm.tlb = 5      |                                 |
+ * |                   +-------------------------+---------------------------------+
+ * |                   |                         | __i915_gem_object_unset_pages() |
+ * |                   |                         | called for obj1, don't flush    |
+ * |                   |                         | as past flush invalidated obj1. |
+ * |                   +-------------------------+---------------------------------+
+ * |                   |                         | __i915_gem_object_unset_pages() |
+ * |                   |                         | called for obj2 => TLB flush.   |
+ * |                   |                         | invalidating obj2.              |
+ * |                   |                         |                                 |
+ * |                   |                         | seqno += 2                      |
+ * +-------------------+-------------------------+---------------------------------+
+ * | seqno=6           |                         |                                 |
+ * +-------------------+-------------------------+---------------------------------+
+ */
+
 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
 
 void intel_gt_init_tlb(struct intel_gt *gt);
 void intel_gt_fini_tlb(struct intel_gt *gt);
 
+/**
+ * intel_gt_tlb_seqno - Returns the current TLB invlidation sequence number
+ * @gt: GT structure
+ *
+ * There's no need to lock while calling it, as seqprop_sequence is thread-safe
+ */
 static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
 {
 	return seqprop_sequence(&gt->tlb.seqno);
 }
 
+/**
+ * intel_gt_next_invalidate_tlb_full - Returns the next TLB full invalidation
+ *	sequence number
+ * @gt: GT structure
+ *
+ * There's no need to lock while calling it, as seqprop_sequence is thread-safe
+ */
 static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
 {
 	return intel_gt_tlb_seqno(gt) | 1;