diff mbox series

[v5] drm/i915: Added Wa_18022495364

Message ID 20230914134724.657957-1-dnyaneshwar.bhadane@intel.com (mailing list archive)
State New, archived
Headers show
Series [v5] drm/i915: Added Wa_18022495364 | expand

Commit Message

Bhadane, Dnyaneshwar Sept. 14, 2023, 1:47 p.m. UTC
Set the instruction and state cache invalidate bit using INDIRECT_CTX on
every gpu context switch. 
The goal of this workaround is to actually perform an explicit
invalidation of that cache (by re-writing the register) during every GPU
context switch, which is accomplished via a "workaround batchbuffer"
that's attached to the context via INDIRECT_CTX. (Matt)
BSpec: 11354

v2:
- Removed extra parentheses from the condition (Lucas)
- Fixed spacing and new line (Lucas)

v3:
- Fixed commit message.

v4:
- Only GEN12 changes are kept  (Matt Ropper)
- Fixed the commit message for r-b (Matt Ropper)
- Renamed the register bit in define

v5:
- Moved out the  from golden context init (Matt Roper)
- Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>

---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

Comments

Kandpal, Suraj Sept. 14, 2023, 4:11 p.m. UTC | #1
> Subject: [Intel-gfx] [PATCH v5] drm/i915: Added Wa_18022495364
> 

Commit message style should be imperative so the header becomes something
Around the lines of 
"Add Wa_18022495364"

> Set the instruction and state cache invalidate bit using INDIRECT_CTX on every
> gpu context switch.
> The goal of this workaround is to actually perform an explicit invalidation of
> that cache (by re-writing the register) during every GPU context switch, which is
> accomplished via a "workaround batchbuffer"
> that's attached to the context via INDIRECT_CTX. (Matt)
> BSpec: 11354
> 
> v2:
> - Removed extra parentheses from the condition (Lucas)
> - Fixed spacing and new line (Lucas)
> 
> v3:
> - Fixed commit message.
> 
> v4:
> - Only GEN12 changes are kept  (Matt Ropper)
> - Fixed the commit message for r-b (Matt Ropper)
> - Renamed the register bit in define
> 
> v5:
> - Moved out the  from golden context init (Matt Roper)
> - Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper)
> 

Same thing applies to the above version fix description
Remove, Fix, Rename, Move

Regards,
Suraj Kandpal
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
> 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
>  drivers/gpu/drm/i915/gt/intel_lrc.c     | 15 +++++++++++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 0e4c638fcbbf..38f02ef8ed01 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -164,6 +164,8 @@
>  #define GEN9_CSFE_CHICKEN1_RCS			_MMIO(0x20d4)
>  #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE	(1 << 2)
>  #define   GEN11_ENABLE_32_PLANE_MODE		(1 << 7)
> +#define GEN12_CS_DEBUG_MODE2			_MMIO(0x20d8)
> +#define   INSTRUCTION_STATE_CACHE_INVALIDATE	REG_BIT(6)
> 
>  #define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
>  #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 967fe4d77a87..fe98039499c5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1332,6 +1332,15 @@ dg2_emit_draw_watermark_setting(u32 *cs)
>  	return cs;
>  }
> 
> +static u32 *
> +gen12_set_instruction_state_cache_invalid(u32 *cs) {
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
> +	*cs++ =
> _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
> +	return cs;
> +}
> +
>  static u32 *
>  gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)  { @@ -
> 1345,6 +1354,12 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context
> *ce, u32 *cs)
> 
>  	cs = gen12_emit_aux_table_inv(ce->engine, cs);
> 
> +	/* Wa_18022495364 */
> +	if (IS_ALDERLAKE_P(ce->engine->i915) || IS_ALDERLAKE_P_N(ce-
> >engine->i915) ||
> +	    IS_ALDERLAKE_S(ce->engine->i915) || IS_TIGERLAKE(ce->engine-
> >i915) ||
> +		IS_ROCKETLAKE(ce->engine->i915) || IS_DG1(ce->engine-
> >i915))
> +		cs = gen12_set_instruction_state_cache_invalid(cs);
> +
>  	/* Wa_16014892111 */
>  	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0,
> STEP_B0) ||
>  	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0,
> STEP_B0) ||
> --
> 2.34.1
Matt Roper Sept. 14, 2023, 4:49 p.m. UTC | #2
On Thu, Sep 14, 2023 at 07:17:24PM +0530, Dnyaneshwar Bhadane wrote:
> Set the instruction and state cache invalidate bit using INDIRECT_CTX on
> every gpu context switch. 
> The goal of this workaround is to actually perform an explicit
> invalidation of that cache (by re-writing the register) during every GPU
> context switch, which is accomplished via a "workaround batchbuffer"
> that's attached to the context via INDIRECT_CTX. (Matt)
> BSpec: 11354
> 
> v2:
> - Removed extra parentheses from the condition (Lucas)
> - Fixed spacing and new line (Lucas)
> 
> v3:
> - Fixed commit message.
> 
> v4:
> - Only GEN12 changes are kept  (Matt Ropper)
> - Fixed the commit message for r-b (Matt Ropper)
> - Renamed the register bit in define
> 
> v5:
> - Moved out the  from golden context init (Matt Roper)
> - Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper)
> 
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
> 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
>  drivers/gpu/drm/i915/gt/intel_lrc.c     | 15 +++++++++++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 0e4c638fcbbf..38f02ef8ed01 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -164,6 +164,8 @@
>  #define GEN9_CSFE_CHICKEN1_RCS			_MMIO(0x20d4)
>  #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE	(1 << 2)
>  #define   GEN11_ENABLE_32_PLANE_MODE		(1 << 7)
> +#define GEN12_CS_DEBUG_MODE2			_MMIO(0x20d8)
> +#define   INSTRUCTION_STATE_CACHE_INVALIDATE	REG_BIT(6)
>  
>  #define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
>  #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 967fe4d77a87..fe98039499c5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1332,6 +1332,15 @@ dg2_emit_draw_watermark_setting(u32 *cs)
>  	return cs;
>  }
>  
> +static u32 *
> +gen12_set_instruction_state_cache_invalid(u32 *cs)

Minor nitpick:  I'd name this "gen12_invalidate_state_cache."  The
general terminology with caches is that we "invalidate" them rather than
"set invalid," and that also matches the terminology used by the
register bit itself.

> +{
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
> +	*cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
> +	return cs;
> +}
> +
>  static u32 *
>  gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>  {
> @@ -1345,6 +1354,12 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>  
>  	cs = gen12_emit_aux_table_inv(ce->engine, cs);
>  
> +	/* Wa_18022495364 */
> +	if (IS_ALDERLAKE_P(ce->engine->i915) || IS_ALDERLAKE_P_N(ce->engine->i915) ||

ADL-N is defined as a subplatform of ADL-P in the driver, so ADL-N
platforms will automatically be matched by the IS_ALDERLAKE_P; you don't
need the IS_ALDERLAKE_P_N condition here (that's only for the rare
places where we want to match ADL-N specifically _without_ matching
other ADL-P platforms as well).

> +	    IS_ALDERLAKE_S(ce->engine->i915) || IS_TIGERLAKE(ce->engine->i915) ||
> +		IS_ROCKETLAKE(ce->engine->i915) || IS_DG1(ce->engine->i915))

Since this workaround winds up applying to every single gen12lp platform, it's
probably simpler to just write the condition as

    if (IS_GFX_GT_IP_RANGE(cs->engine->i915), IP_VER(12, 0), IP_VER(12, 10))


Matt

> +		cs = gen12_set_instruction_state_cache_invalid(cs);
> +
>  	/* Wa_16014892111 */
>  	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
>  	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
> -- 
> 2.34.1
>
Tvrtko Ursulin Sept. 28, 2023, 12:23 p.m. UTC | #3
On 14/09/2023 17:49, Matt Roper wrote:
> On Thu, Sep 14, 2023 at 07:17:24PM +0530, Dnyaneshwar Bhadane wrote:
>> Set the instruction and state cache invalidate bit using INDIRECT_CTX on
>> every gpu context switch.
>> The goal of this workaround is to actually perform an explicit
>> invalidation of that cache (by re-writing the register) during every GPU
>> context switch, which is accomplished via a "workaround batchbuffer"
>> that's attached to the context via INDIRECT_CTX. (Matt)
>> BSpec: 11354
>>
>> v2:
>> - Removed extra parentheses from the condition (Lucas)
>> - Fixed spacing and new line (Lucas)
>>
>> v3:
>> - Fixed commit message.
>>
>> v4:
>> - Only GEN12 changes are kept  (Matt Ropper)
>> - Fixed the commit message for r-b (Matt Ropper)
>> - Renamed the register bit in define
>>
>> v5:
>> - Moved out the  from golden context init (Matt Roper)
>> - Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper)
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
>>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
>>   drivers/gpu/drm/i915/gt/intel_lrc.c     | 15 +++++++++++++++
>>   2 files changed, 17 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> index 0e4c638fcbbf..38f02ef8ed01 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> @@ -164,6 +164,8 @@
>>   #define GEN9_CSFE_CHICKEN1_RCS			_MMIO(0x20d4)
>>   #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE	(1 << 2)
>>   #define   GEN11_ENABLE_32_PLANE_MODE		(1 << 7)
>> +#define GEN12_CS_DEBUG_MODE2			_MMIO(0x20d8)
>> +#define   INSTRUCTION_STATE_CACHE_INVALIDATE	REG_BIT(6)
>>   
>>   #define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
>>   #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 967fe4d77a87..fe98039499c5 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -1332,6 +1332,15 @@ dg2_emit_draw_watermark_setting(u32 *cs)
>>   	return cs;
>>   }
>>   
>> +static u32 *
>> +gen12_set_instruction_state_cache_invalid(u32 *cs)
> 
> Minor nitpick:  I'd name this "gen12_invalidate_state_cache."  The
> general terminology with caches is that we "invalidate" them rather than
> "set invalid," and that also matches the terminology used by the
> register bit itself.
> 
>> +{
>> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
>> +	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
>> +	*cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
>> +	return cs;
>> +}
>> +
>>   static u32 *
>>   gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>>   {
>> @@ -1345,6 +1354,12 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>>   
>>   	cs = gen12_emit_aux_table_inv(ce->engine, cs);
>>   
>> +	/* Wa_18022495364 */
>> +	if (IS_ALDERLAKE_P(ce->engine->i915) || IS_ALDERLAKE_P_N(ce->engine->i915) ||
> 
> ADL-N is defined as a subplatform of ADL-P in the driver, so ADL-N
> platforms will automatically be matched by the IS_ALDERLAKE_P; you don't
> need the IS_ALDERLAKE_P_N condition here (that's only for the rare
> places where we want to match ADL-N specifically _without_ matching
> other ADL-P platforms as well).
> 
>> +	    IS_ALDERLAKE_S(ce->engine->i915) || IS_TIGERLAKE(ce->engine->i915) ||
>> +		IS_ROCKETLAKE(ce->engine->i915) || IS_DG1(ce->engine->i915))
> 
> Since this workaround winds up applying to every single gen12lp platform, it's
> probably simpler to just write the condition as
> 
>      if (IS_GFX_GT_IP_RANGE(cs->engine->i915), IP_VER(12, 0), IP_VER(12, 10))

Can we haz a decoder sheet of ip ver to marketing/user platform names? 
It will be harder and harder to know when the thing is read out from the 
GPU register at runtime.

Wishing for platform strings in comments back maybe? :)

Regards,

Tvrtko

> 
> 
> Matt
> 
>> +		cs = gen12_set_instruction_state_cache_invalid(cs);
>> +
>>   	/* Wa_16014892111 */
>>   	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
>>   	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
>> -- 
>> 2.34.1
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 0e4c638fcbbf..38f02ef8ed01 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -164,6 +164,8 @@ 
 #define GEN9_CSFE_CHICKEN1_RCS			_MMIO(0x20d4)
 #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE	(1 << 2)
 #define   GEN11_ENABLE_32_PLANE_MODE		(1 << 7)
+#define GEN12_CS_DEBUG_MODE2			_MMIO(0x20d8)
+#define   INSTRUCTION_STATE_CACHE_INVALIDATE	REG_BIT(6)
 
 #define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
 #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 967fe4d77a87..fe98039499c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1332,6 +1332,15 @@  dg2_emit_draw_watermark_setting(u32 *cs)
 	return cs;
 }
 
+static u32 *
+gen12_set_instruction_state_cache_invalid(u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
+	*cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+	return cs;
+}
+
 static u32 *
 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 {
@@ -1345,6 +1354,12 @@  gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 
 	cs = gen12_emit_aux_table_inv(ce->engine, cs);
 
+	/* Wa_18022495364 */
+	if (IS_ALDERLAKE_P(ce->engine->i915) || IS_ALDERLAKE_P_N(ce->engine->i915) ||
+	    IS_ALDERLAKE_S(ce->engine->i915) || IS_TIGERLAKE(ce->engine->i915) ||
+		IS_ROCKETLAKE(ce->engine->i915) || IS_DG1(ce->engine->i915))
+		cs = gen12_set_instruction_state_cache_invalid(cs);
+
 	/* Wa_16014892111 */
 	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
 	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||