diff mbox

[v2,2/2] drm/i915: Invalidate the guc ggtt TLB upon insertion

Message ID 20161201094647.4913-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 1, 2016, 9:46 a.m. UTC
Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
modification rather than proliferate it into all the callers of the
insert (which may or may not in fact have to do the insertion).

v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
without the guc loaded on gen8+

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch> # v1
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> # v1
---
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 ++++++++----
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 ---
 drivers/gpu/drm/i915/intel_guc_loader.c    |  3 ---
 drivers/gpu/drm/i915/intel_lrc.c           |  6 ------
 4 files changed, 8 insertions(+), 16 deletions(-)

Comments

Tvrtko Ursulin Dec. 1, 2016, 10:06 a.m. UTC | #1
On 01/12/2016 09:46, Chris Wilson wrote:
> Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
> modification rather than proliferate it into all the callers of the
> insert (which may or may not in fact have to do the insertion).
>
> v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
> without the guc loaded on gen8+

Why do you find it tempting to do it unconditionally? I would rather not 
touch it on gen8 at all and would also prefer the conditional flush in gen9.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> # v1
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> # v1
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 ++++++++----
>  drivers/gpu/drm/i915/i915_guc_submission.c |  3 ---
>  drivers/gpu/drm/i915/intel_guc_loader.c    |  3 ---
>  drivers/gpu/drm/i915/intel_lrc.c           |  6 ------
>  4 files changed, 8 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 02fb063302bf..dccc4d0eef88 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2387,6 +2387,12 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>  	writeq(pte, addr);
>  }
>
> +static void gen8_ggtt_invalidate(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> +	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> +}
> +
>  static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>  				  dma_addr_t addr,
>  				  uint64_t offset,
> @@ -2400,8 +2406,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>
>  	gen8_set_pte(pte, gen8_pte_encode(addr, level));
>
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	gen8_ggtt_invalidate(dev_priv);
>  }
>
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> @@ -2438,8 +2443,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	 * want to flush the TLBs only after we're certain all the PTE updates
>  	 * have finished.
>  	 */
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	gen8_ggtt_invalidate(dev_priv);
>  }
>
>  struct insert_entries {
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 58413803ba3c..ef8c18d61a0f 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -589,9 +589,6 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
>  		goto err;
>  	}
>
> -	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
> -	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -
>  	return vma;
>
>  err:
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index a330fa499384..4fb1037faad1 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -366,9 +366,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  		return PTR_ERR(vma);
>  	}
>
> -	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
> -	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -
>  	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>
>  	/* init WOPCM */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b2c0d509e191..48fc93dadb40 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,12 +874,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>
>  	ce->state->obj->mm.dirty = true;
>
> -	/* Invalidate GuC TLB. */
> -	if (i915.enable_guc_submission) {
> -		struct drm_i915_private *dev_priv = ctx->i915;
> -		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -	}
> -
>  	i915_gem_context_get(ctx);
>  	return 0;
>
>
Chris Wilson Dec. 1, 2016, 10:26 a.m. UTC | #2
On Thu, Dec 01, 2016 at 10:06:25AM +0000, Tvrtko Ursulin wrote:
> 
> On 01/12/2016 09:46, Chris Wilson wrote:
> >Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
> >modification rather than proliferate it into all the callers of the
> >insert (which may or may not in fact have to do the insertion).
> >
> >v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
> >without the guc loaded on gen8+
> 
> Why do you find it tempting to do it unconditionally? I would rather
> not touch it on gen8 at all and would also prefer the conditional
> flush in gen9.

Because if I add a conditional here, I end up wanting writing a new vfunc
for invalidate (if I can coax the gmch / gen6 / guc usage into something
consistent). And I'm lazy. :)
-Chris
Tvrtko Ursulin Dec. 1, 2016, 10:49 a.m. UTC | #3
On 01/12/2016 10:26, Chris Wilson wrote:
> On Thu, Dec 01, 2016 at 10:06:25AM +0000, Tvrtko Ursulin wrote:
>>
>> On 01/12/2016 09:46, Chris Wilson wrote:
>>> Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
>>> modification rather than proliferate it into all the callers of the
>>> insert (which may or may not in fact have to do the insertion).
>>>
>>> v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
>>> without the guc loaded on gen8+
>>
>> Why do you find it tempting to do it unconditionally? I would rather
>> not touch it on gen8 at all and would also prefer the conditional
>> flush in gen9.
>
> Because if I add a conditional here, I end up wanting writing a new vfunc
> for invalidate (if I can coax the gmch / gen6 / guc usage into something
> consistent). And I'm lazy. :)

To make sure I fully understand - just because you would not like to see 
the conditional in gen8_ggtt_invalidate? So you would add 
gen8_ggtt_invalidate and gen9_ggtt_invalidate with a GuC flush?

I would have thought conditional is less bothersome than making the 
unused piece of the GPU (on gen8) do stuff.

Regards,

Tvrtko
Chris Wilson Dec. 1, 2016, 10:57 a.m. UTC | #4
On Thu, Dec 01, 2016 at 10:49:31AM +0000, Tvrtko Ursulin wrote:
> 
> On 01/12/2016 10:26, Chris Wilson wrote:
> >On Thu, Dec 01, 2016 at 10:06:25AM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 01/12/2016 09:46, Chris Wilson wrote:
> >>>Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
> >>>modification rather than proliferate it into all the callers of the
> >>>insert (which may or may not in fact have to do the insertion).
> >>>
> >>>v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
> >>>without the guc loaded on gen8+
> >>
> >>Why do you find it tempting to do it unconditionally? I would rather
> >>not touch it on gen8 at all and would also prefer the conditional
> >>flush in gen9.
> >
> >Because if I add a conditional here, I end up wanting writing a new vfunc
> >for invalidate (if I can coax the gmch / gen6 / guc usage into something
> >consistent). And I'm lazy. :)
> 
> To make sure I fully understand - just because you would not like to
> see the conditional in gen8_ggtt_invalidate? So you would add
> gen8_ggtt_invalidate and gen9_ggtt_invalidate with a GuC flush?

gen9_guc_ggtt_invalidate. Because I don't like the conditional in
i915_ggtt_flush() - and that shows we have another place missing the guc
invalidate. And also because at one point, I was toying with
the idea of pushing the flush out to the caller of ->insert_page() in
case they wanted to manipulate multiple pages. (However, that looks
like it will remain single pages only.) I
 
> I would have thought conditional is less bothersome than making the
> unused piece of the GPU (on gen8) do stuff.

I thought you wouldn't want to punch a bit on the register if it wasn't
being used at all.
-Chris
Tvrtko Ursulin Dec. 1, 2016, 11:04 a.m. UTC | #5
On 01/12/2016 10:57, Chris Wilson wrote:
> On Thu, Dec 01, 2016 at 10:49:31AM +0000, Tvrtko Ursulin wrote:
>>
>> On 01/12/2016 10:26, Chris Wilson wrote:
>>> On Thu, Dec 01, 2016 at 10:06:25AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 01/12/2016 09:46, Chris Wilson wrote:
>>>>> Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
>>>>> modification rather than proliferate it into all the callers of the
>>>>> insert (which may or may not in fact have to do the insertion).
>>>>>
>>>>> v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
>>>>> without the guc loaded on gen8+
>>>>
>>>> Why do you find it tempting to do it unconditionally? I would rather
>>>> not touch it on gen8 at all and would also prefer the conditional
>>>> flush in gen9.
>>>
>>> Because if I add a conditional here, I end up wanting writing a new vfunc
>>> for invalidate (if I can coax the gmch / gen6 / guc usage into something
>>> consistent). And I'm lazy. :)
>>
>> To make sure I fully understand - just because you would not like to
>> see the conditional in gen8_ggtt_invalidate? So you would add
>> gen8_ggtt_invalidate and gen9_ggtt_invalidate with a GuC flush?
>
> gen9_guc_ggtt_invalidate. Because I don't like the conditional in
> i915_ggtt_flush() - and that shows we have another place missing the guc
> invalidate. And also because at one point, I was toying with
> the idea of pushing the flush out to the caller of ->insert_page() in
> case they wanted to manipulate multiple pages. (However, that looks
> like it will remain single pages only.) I
>
>> I would have thought conditional is less bothersome than making the
>> unused piece of the GPU (on gen8) do stuff.
>
> I thought you wouldn't want to punch a bit on the register if it wasn't
> being used at all.

Yes I wouldn't, and not only unused but not validated probably.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 02fb063302bf..dccc4d0eef88 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2387,6 +2387,12 @@  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 	writeq(pte, addr);
 }
 
+static void gen8_ggtt_invalidate(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
 				  uint64_t offset,
@@ -2400,8 +2406,7 @@  static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 
 	gen8_set_pte(pte, gen8_pte_encode(addr, level));
 
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	gen8_ggtt_invalidate(dev_priv);
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2438,8 +2443,7 @@  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 * want to flush the TLBs only after we're certain all the PTE updates
 	 * have finished.
 	 */
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	gen8_ggtt_invalidate(dev_priv);
 }
 
 struct insert_entries {
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 58413803ba3c..ef8c18d61a0f 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -589,9 +589,6 @@  static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 		goto err;
 	}
 
-	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
-	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-
 	return vma;
 
 err:
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index a330fa499384..4fb1037faad1 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -366,9 +366,6 @@  static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 		return PTR_ERR(vma);
 	}
 
-	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
-	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	/* init WOPCM */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b2c0d509e191..48fc93dadb40 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -874,12 +874,6 @@  static int intel_lr_context_pin(struct i915_gem_context *ctx,
 
 	ce->state->obj->mm.dirty = true;
 
-	/* Invalidate GuC TLB. */
-	if (i915.enable_guc_submission) {
-		struct drm_i915_private *dev_priv = ctx->i915;
-		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-	}
-
 	i915_gem_context_get(ctx);
 	return 0;