[v6,5/6] drm/i915/gen8: Add WaClearSlmSpaceAtContextSwitch workaround
diff mbox

Message ID 1434735435-14728-6-git-send-email-arun.siluvery@linux.intel.com
State New
Headers show

Commit Message

arun.siluvery@linux.intel.com June 19, 2015, 5:37 p.m. UTC
In Indirect context w/a batch buffer,
WaClearSlmSpaceAtContextSwitch

This WA performs writes to scratch page so it must be valid, this check
is performed before initializing the batch with this WA.

v2: s/PIPE_CONTROL_FLUSH_RO_CACHES/PIPE_CONTROL_FLUSH_L3 (Ville)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h  |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

Comments

Chris Wilson June 19, 2015, 6:09 p.m. UTC | #1
On Fri, Jun 19, 2015 at 06:37:14PM +0100, Arun Siluvery wrote:
> In Indirect context w/a batch buffer,
> WaClearSlmSpaceAtContextSwitch
> 
> This WA performs writes to scratch page so it must be valid, this check
> is performed before initializing the batch with this WA.
> 
> v2: s/PIPE_CONTROL_FLUSH_RO_CACHES/PIPE_CONTROL_FLUSH_L3 (Ville)
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h  |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++++++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index d14ad20..7637e64 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -410,6 +410,7 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
> +#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
>  #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
>  #define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
>  #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3e7aaa9..664455c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1137,6 +1137,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
>  				    uint32_t *const batch,
>  				    uint32_t *offset)
>  {
> +	uint32_t scratch_addr;
>  	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
>  
>  	/* WaDisableCtxRestoreArbitration:bdw,chv */
> @@ -1165,6 +1166,21 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
>  		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
>  	}
>  
> +	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
> +	/* Actual scratch location is at 128 bytes offset */
> +	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
> +	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;

I thought this bit was now mbz - that's how we treat it elsewhere e.g.
gen8_emit_flush_render, and that the address has to be naturally aligned
for the target write. (Similar bit in patch 6 fwiw.)
-Chris
arun.siluvery@linux.intel.com June 22, 2015, 11:29 a.m. UTC | #2
On 19/06/2015 19:09, Chris Wilson wrote:
> On Fri, Jun 19, 2015 at 06:37:14PM +0100, Arun Siluvery wrote:
>> In Indirect context w/a batch buffer,
>> WaClearSlmSpaceAtContextSwitch
>>
>> This WA performs writes to scratch page so it must be valid, this check
>> is performed before initializing the batch with this WA.
>>
>> v2: s/PIPE_CONTROL_FLUSH_RO_CACHES/PIPE_CONTROL_FLUSH_L3 (Ville)
>>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_reg.h  |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++++++++++
>>   2 files changed, 17 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index d14ad20..7637e64 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -410,6 +410,7 @@
>>   #define   DISPLAY_PLANE_A           (0<<20)
>>   #define   DISPLAY_PLANE_B           (1<<20)
>>   #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
>> +#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
>>   #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
>>   #define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
>>   #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 3e7aaa9..664455c 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1137,6 +1137,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
>>   				    uint32_t *const batch,
>>   				    uint32_t *offset)
>>   {
>> +	uint32_t scratch_addr;
>>   	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
>>
>>   	/* WaDisableCtxRestoreArbitration:bdw,chv */
>> @@ -1165,6 +1166,21 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
>>   		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
>>   	}
>>
>> +	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
>> +	/* Actual scratch location is at 128 bytes offset */
>> +	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
>> +	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
>
> I thought this bit was now mbz - that's how we treat it elsewhere e.g.
> gen8_emit_flush_render, and that the address has to be naturally aligned
> for the target write. (Similar bit in patch 6 fwiw.)
you are correct, this bit is mbz.

Daniel, could you please remove this line when applying patches?
sorry for additional work.

 >> +	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;

regards
Arun

> -Chris
>
Daniel Vetter June 22, 2015, 3:39 p.m. UTC | #3
On Mon, Jun 22, 2015 at 12:29:05PM +0100, Siluvery, Arun wrote:
> On 19/06/2015 19:09, Chris Wilson wrote:
> >On Fri, Jun 19, 2015 at 06:37:14PM +0100, Arun Siluvery wrote:
> >>In Indirect context w/a batch buffer,
> >>WaClearSlmSpaceAtContextSwitch
> >>
> >>This WA performs writes to scratch page so it must be valid, this check
> >>is performed before initializing the batch with this WA.
> >>
> >>v2: s/PIPE_CONTROL_FLUSH_RO_CACHES/PIPE_CONTROL_FLUSH_L3 (Ville)
> >>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Cc: Dave Gordon <david.s.gordon@intel.com>
> >>Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
> >>Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_reg.h  |  1 +
> >>  drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++++++++++
> >>  2 files changed, 17 insertions(+)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >>index d14ad20..7637e64 100644
> >>--- a/drivers/gpu/drm/i915/i915_reg.h
> >>+++ b/drivers/gpu/drm/i915/i915_reg.h
> >>@@ -410,6 +410,7 @@
> >>  #define   DISPLAY_PLANE_A           (0<<20)
> >>  #define   DISPLAY_PLANE_B           (1<<20)
> >>  #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
> >>+#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
> >>  #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
> >>  #define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
> >>  #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
> >>diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>index 3e7aaa9..664455c 100644
> >>--- a/drivers/gpu/drm/i915/intel_lrc.c
> >>+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>@@ -1137,6 +1137,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
> >>  				    uint32_t *const batch,
> >>  				    uint32_t *offset)
> >>  {
> >>+	uint32_t scratch_addr;
> >>  	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
> >>
> >>  	/* WaDisableCtxRestoreArbitration:bdw,chv */
> >>@@ -1165,6 +1166,21 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
> >>  		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
> >>  	}
> >>
> >>+	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
> >>+	/* Actual scratch location is at 128 bytes offset */
> >>+	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
> >>+	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
> >
> >I thought this bit was now mbz - that's how we treat it elsewhere e.g.
> >gen8_emit_flush_render, and that the address has to be naturally aligned
> >for the target write. (Similar bit in patch 6 fwiw.)
> you are correct, this bit is mbz.
> 
> Daniel, could you please remove this line when applying patches?
> sorry for additional work.
> 
> >> +	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;

Since you need to send out a follow-up anyway, can you pls do that when
resending? I'm still applying all the other ones.
-Daniel

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d14ad20..7637e64 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -410,6 +410,7 @@ 
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
 #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3e7aaa9..664455c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1137,6 +1137,7 @@  static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
 				    uint32_t *const batch,
 				    uint32_t *offset)
 {
+	uint32_t scratch_addr;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
@@ -1165,6 +1166,21 @@  static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
 		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
 	}
 
+	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+	/* Actual scratch location is at 128 bytes offset */
+	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
+	scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
+
+	wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
+	wa_ctx_emit(batch, (PIPE_CONTROL_FLUSH_L3 |
+			    PIPE_CONTROL_GLOBAL_GTT_IVB |
+			    PIPE_CONTROL_CS_STALL |
+			    PIPE_CONTROL_QW_WRITE));
+	wa_ctx_emit(batch, scratch_addr);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+
 	/* Pad to end of cacheline */
 	while (index % CACHELINE_DWORDS)
 		wa_ctx_emit(batch, MI_NOOP);