diff mbox series

[v3,11/14] drm/i915/gt: Add MCR-specific workaround initializers

Message ID 20221014230239.1023689-12-matthew.d.roper@intel.com (mailing list archive)
State New, archived
Headers show
Series Explicit MCR handling and MTL steering | expand

Commit Message

Matt Roper Oct. 14, 2022, 11:02 p.m. UTC
Let's be more explicit about which of our workarounds are updating MCR
registers.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 433 +++++++++++-------
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
 2 files changed, 263 insertions(+), 174 deletions(-)

Comments

Vivekanandan, Balasubramani Oct. 17, 2022, 4:54 p.m. UTC | #1
On 14.10.2022 16:02, Matt Roper wrote:
> Let's be more explicit about which of our workarounds are updating MCR
> registers.
> 
> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 433 +++++++++++-------
>  .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
>  2 files changed, 263 insertions(+), 174 deletions(-)

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 96b9f02a2284..7671994d5b7a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
>  	_wa_add(wal, &wa);
>  }
>  
> +static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
> +		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
> +{
> +	struct i915_wa wa = {
> +		.reg  = reg,
> +		.clr  = clear,
> +		.set  = set,
> +		.read = read_mask,
> +		.masked_reg = masked_reg,
> +		.is_mcr = 1,
> +	};
> +
> +	_wa_add(wal, &wa);
> +}
> +
>  static void
>  wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
>  {
>  	wa_add(wal, reg, clear, set, clear, false);
>  }
>  
> +static void
> +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
> +{
> +	wa_mcr_add(wal, reg, clear, set, clear, false);
> +}
> +
>  static void
>  wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
>  {
> @@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
>  	wa_write_clr_set(wal, reg, set, set);
>  }
>  
> +static void
> +wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
> +{
> +	wa_mcr_write_clr_set(wal, reg, set, set);
> +}
> +
>  static void
>  wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
>  {
>  	wa_write_clr_set(wal, reg, clr, 0);
>  }
>  
> +static void
> +wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
> +{
> +	wa_mcr_write_clr_set(wal, reg, clr, 0);
> +}
> +
>  /*
>   * WA operations on "masked register". A masked register has the upper 16 bits
>   * documented as "masked" in b-spec. Its purpose is to allow writing to just a
> @@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
>  	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
>  }
>  
> +static void
> +wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
> +{
> +	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
> +}
> +
>  static void
>  wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
>  {
>  	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
>  }
>  
> +static void
> +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
> +{
> +	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
> +}
> +
>  static void
>  wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
>  		    u32 mask, u32 val)
> @@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
>  	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
>  }
>  
> +static void
> +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
> +			u32 mask, u32 val)
> +{
> +	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
> +}
> +
>  static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
>  				      struct i915_wa_list *wal)
>  {
> @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
>  
>  	/* WaDisablePartialInstShootdown:bdw,chv */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN,
> -		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
> +			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>  
>  	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>  	 * workaround for a possible hang in the unlikely event a TLB
> @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	gen8_ctx_workarounds_init(engine, wal);
>  
>  	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
>  
>  	/* WaDisableDopClockGating:bdw
>  	 *
>  	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
>  	 * to disable EUTC clock gating.
>  	 */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN2,
> -		     DOP_CLOCK_GATING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
> +			 DOP_CLOCK_GATING_DISABLE);
>  
> -	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
> -		     GEN8_SAMPLER_POWER_BYPASS_DIS);
> +	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
> +			 GEN8_SAMPLER_POWER_BYPASS_DIS);
>  
>  	wa_masked_en(wal, HDC_CHICKEN0,
>  		     /* WaForceContextSaveRestoreNonCoherent:bdw */
> @@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	gen8_ctx_workarounds_init(engine, wal);
>  
>  	/* WaDisableThreadStallDopClockGating:chv */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
>  
>  	/* Improve HiZ throughput on CHV. */
>  	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
> @@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
>  		 */
>  		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
>  			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
> -		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> -			     GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
> +		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> +				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
>  	}
>  
>  	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
>  	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN,
> -		     FLOW_CONTROL_ENABLE |
> -		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
> +			 FLOW_CONTROL_ENABLE |
> +			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>  
>  	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
>  	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
> -	wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> -		     GEN9_ENABLE_YV12_BUGFIX |
> -		     GEN9_ENABLE_GPGPU_PREEMPTION);
> +	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> +			 GEN9_ENABLE_YV12_BUGFIX |
> +			 GEN9_ENABLE_GPGPU_PREEMPTION);
>  
>  	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
>  	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
> @@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
>  		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
>  
>  	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
> -	wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
> -		      GEN9_CCS_TLB_PREFETCH_ENABLE);
> +	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
> +			  GEN9_CCS_TLB_PREFETCH_ENABLE);
>  
>  	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
>  	wa_masked_en(wal, HDC_CHICKEN0,
> @@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	    IS_KABYLAKE(i915) ||
>  	    IS_COFFEELAKE(i915) ||
>  	    IS_COMETLAKE(i915))
> -		wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
> -			     GEN8_SAMPLER_POWER_BYPASS_DIS);
> +		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
> +				 GEN8_SAMPLER_POWER_BYPASS_DIS);
>  
>  	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
> -	wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
> +	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
>  
>  	/*
>  	 * Supporting preemption with fine-granularity requires changes in the
> @@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	gen9_ctx_workarounds_init(engine, wal);
>  
>  	/* WaDisableThreadStallDopClockGating:bxt */
> -	wa_masked_en(wal, GEN8_ROW_CHICKEN,
> -		     STALL_DOP_GATING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
> +			 STALL_DOP_GATING_DISABLE);
>  
>  	/* WaToEnableHwFixForPushConstHWBug:bxt */
>  	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
> @@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
>  			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
>  
>  	/* WaDisableSbeCacheDispatchPortSharing:kbl */
> -	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> -		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> +			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>  }
>  
>  static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
> @@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
>  		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
>  
>  	/* WaDisableSbeCacheDispatchPortSharing:cfl */
> -	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> -		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> +	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> +			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>  }
>  
>  static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
> @@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
>  	 * (the register is whitelisted in hardware now, so UMDs can opt in
>  	 * for coherency if they have a good reason).
>  	 */
> -	wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
> +	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
>  
>  	/* WaEnableFloatBlendOptimization:icl */
> -	wa_add(wal, GEN10_CACHE_MODE_SS, 0,
> -	       _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
> -	       0 /* write-only, so skip validation */,
> -	       true);
> +	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> +		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
> +		   0 /* write-only, so skip validation */,
> +		   true);
>  
>  	/* WaDisableGPGPUMidThreadPreemption:icl */
>  	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
> @@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
>  			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
>  
>  	/* allow headerless messages for preemptible GPGPU context */
> -	wa_masked_en(wal, GEN10_SAMPLER_MODE,
> -		     GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
> +	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
> +			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
>  
>  	/* Wa_1604278689:icl,ehl */
>  	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
> @@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
>  			 0xFFFFFFFF);
>  
>  	/* Wa_1406306137:icl,ehl */
> -	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
> +	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
>  }
>  
>  /*
> @@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
>  				   struct i915_wa_list *wal)
>  {
>  	wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
> -	wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> -			 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
> -	wa_add(wal,
> -	       XEHP_FF_MODE2,
> -	       FF_MODE2_TDS_TIMER_MASK,
> -	       FF_MODE2_TDS_TIMER_128,
> -	       0, false);
> +	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> +			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
> +	wa_mcr_add(wal,
> +		   XEHP_FF_MODE2,
> +		   FF_MODE2_TDS_TIMER_MASK,
> +		   FF_MODE2_TDS_TIMER_128,
> +		   0, false);
>  }
>  
>  /*
> @@ -664,27 +716,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
>  
>  	/* Wa_16011186671:dg2_g11 */
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
> -		wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
> -		wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
> +		wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
> +		wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
>  		/* Wa_14010469329:dg2_g10 */
> -		wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
> -			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
> +		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
> +				 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
>  
>  		/*
>  		 * Wa_22010465075:dg2_g10
>  		 * Wa_22010613112:dg2_g10
>  		 * Wa_14010698770:dg2_g10
>  		 */
> -		wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
> -			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
> +		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
> +				 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
>  	}
>  
>  	/* Wa_16013271637:dg2 */
> -	wa_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
> -		     MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
> +	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
> +			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
>  
>  	/* Wa_14014947963:dg2 */
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
> @@ -1264,9 +1316,9 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  		    PSDUNIT_CLKGATE_DIS);
>  
>  	/* Wa_1406680159:icl,ehl */
> -	wa_write_or(wal,
> -		    GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
> -		    GWUNIT_CLKGATE_DIS);
> +	wa_mcr_write_or(wal,
> +			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
> +			GWUNIT_CLKGATE_DIS);
>  
>  	/* Wa_1607087056:icl,ehl,jsl */
>  	if (IS_ICELAKE(i915) ||
> @@ -1279,7 +1331,7 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  	 * This is not a documented workaround, but rather an optimization
>  	 * to reduce sampler power.
>  	 */
> -	wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
> +	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
>  }
>  
>  /*
> @@ -1313,7 +1365,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  	wa_14011060649(gt, wal);
>  
>  	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
> -	wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
> +	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
>  }
>  
>  static void
> @@ -1325,9 +1377,9 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  
>  	/* Wa_1409420604:tgl */
>  	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
> -		wa_write_or(wal,
> -			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
> -			    CPSSUNIT_CLKGATE_DIS);
> +		wa_mcr_write_or(wal,
> +				SUBSLICE_UNIT_LEVEL_CLKGATE2,
> +				CPSSUNIT_CLKGATE_DIS);
>  
>  	/* Wa_1607087056:tgl also know as BUG:1409180338 */
>  	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
> @@ -1356,9 +1408,9 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  
>  	/* Wa_1409420604:dg1 */
>  	if (IS_DG1(i915))
> -		wa_write_or(wal,
> -			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
> -			    CPSSUNIT_CLKGATE_DIS);
> +		wa_mcr_write_or(wal,
> +				SUBSLICE_UNIT_LEVEL_CLKGATE2,
> +				CPSSUNIT_CLKGATE_DIS);
>  
>  	/* Wa_1408615072:dg1 */
>  	/* Empirical testing shows this register is unaffected by engine reset. */
> @@ -1375,7 +1427,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  	xehp_init_mcr(gt, wal);
>  
>  	/* Wa_1409757795:xehpsdv */
> -	wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
> +	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
>  
>  	/* Wa_16011155590:xehpsdv */
>  	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
> @@ -1455,8 +1507,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  			    CG3DDISCFEG_CLKGATE_DIS);
>  
>  		/* Wa_14011006942:dg2 */
> -		wa_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
> -			    DSS_ROUTER_CLKGATE_DIS);
> +		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
> +				DSS_ROUTER_CLKGATE_DIS);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
> @@ -1467,7 +1519,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  		wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
>  
>  		/* Wa_14011371254:dg2_g10 */
> -		wa_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
> +		wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
>  
>  		/* Wa_14011431319:dg2_g10 */
>  		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
> @@ -1503,21 +1555,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  			    GAMEDIA_CLKGATE_DIS);
>  
>  		/* Wa_14011028019:dg2_g10 */
> -		wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
> +		wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
>  	}
>  
>  	/* Wa_14014830051:dg2 */
> -	wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
> +	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
>  
>  	/*
>  	 * The following are not actually "workarounds" but rather
>  	 * recommended tuning settings documented in the bspec's
>  	 * performance guide section.
>  	 */
> -	wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
> +	wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
>  
>  	/* Wa_14015795083 */
> -	wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
> +	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  }
>  
>  static void
> @@ -1526,7 +1578,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  	pvc_init_mcr(gt, wal);
>  
>  	/* Wa_14015795083 */
> -	wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
> +	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  }
>  
>  static void
> @@ -1638,14 +1690,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
>  		u32 val, old = 0;
>  
>  		/* open-coded rmw due to steering */
> -		old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
> +		if (wa->clr)
> +			old = wa->is_mcr ?
> +				intel_gt_mcr_read_any_fw(gt, wa->reg) :
> +				intel_uncore_read_fw(uncore, wa->reg);
>  		val = (old & ~wa->clr) | wa->set;
> -		if (val != old || !wa->clr)
> -			intel_uncore_write_fw(uncore, wa->reg, val);
> +		if (val != old || !wa->clr) {
> +			if (wa->is_mcr)
> +				intel_gt_mcr_multicast_write_fw(gt, wa->reg, val);
> +			else
> +				intel_uncore_write_fw(uncore, wa->reg, val);
> +		}
> +
> +		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
> +			u32 val = wa->is_mcr ?
> +				intel_gt_mcr_read_any_fw(gt, wa->reg) :
> +				intel_uncore_read_fw(uncore, wa->reg);
>  
> -		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> -			wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
> -				  wal->name, "application");
> +			wa_verify(wa, val, wal->name, "application");
> +		}
>  	}
>  
>  	intel_uncore_forcewake_put__locked(uncore, fw);
> @@ -1674,8 +1737,9 @@ static bool wa_list_verify(struct intel_gt *gt,
>  	intel_uncore_forcewake_get__locked(uncore, fw);
>  
>  	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
> -		ok &= wa_verify(wa,
> -				intel_gt_mcr_read_any_fw(gt, wa->reg),
> +		ok &= wa_verify(wa, wa->is_mcr ?
> +				intel_gt_mcr_read_any_fw(gt, wa->reg) :
> +				intel_uncore_read_fw(uncore, wa->reg),
>  				wal->name, from);
>  
>  	intel_uncore_forcewake_put__locked(uncore, fw);
> @@ -1721,12 +1785,36 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
>  	_wa_add(wal, &wa);
>  }
>  
> +static void
> +whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
> +{
> +	struct i915_wa wa = {
> +		.reg = reg,
> +		.is_mcr = 1,
> +	};
> +
> +	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
> +		return;
> +
> +	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
> +		return;
> +
> +	wa.reg.reg |= flags;
> +	_wa_add(wal, &wa);
> +}
> +
>  static void
>  whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
>  {
>  	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
>  }
>  
> +static void
> +whitelist_mcr_reg(struct i915_wa_list *wal, i915_reg_t reg)
> +{
> +	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
> +}
> +
>  static void gen9_whitelist_build(struct i915_wa_list *w)
>  {
>  	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
> @@ -1752,7 +1840,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine)
>  	gen9_whitelist_build(w);
>  
>  	/* WaDisableLSQCROPERFforOCL:skl */
> -	whitelist_reg(w, GEN8_L3SQCREG4);
> +	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
>  }
>  
>  static void bxt_whitelist_build(struct intel_engine_cs *engine)
> @@ -1773,7 +1861,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine)
>  	gen9_whitelist_build(w);
>  
>  	/* WaDisableLSQCROPERFforOCL:kbl */
> -	whitelist_reg(w, GEN8_L3SQCREG4);
> +	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
>  }
>  
>  static void glk_whitelist_build(struct intel_engine_cs *engine)
> @@ -1838,10 +1926,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
>  	switch (engine->class) {
>  	case RENDER_CLASS:
>  		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
> -		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
> +		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
>  
>  		/* WaAllowUMDToModifySamplerMode:icl */
> -		whitelist_reg(w, GEN10_SAMPLER_MODE);
> +		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
>  
>  		/* WaEnableStateCacheRedirectToCS:icl */
>  		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
> @@ -2117,21 +2205,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
>  		/* Wa_14013392000:dg2_g11 */
> -		wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
> +		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
>  	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
>  		/* Wa_1509727124:dg2 */
> -		wa_masked_en(wal, GEN10_SAMPLER_MODE,
> -			     SC_DISABLE_POWER_OPTIMIZATION_EBB);
> +		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
> +				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
>  	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
>  		/* Wa_14012419201:dg2 */
> -		wa_masked_en(wal, GEN9_ROW_CHICKEN4,
> -			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
> +		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
> +				 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
> @@ -2140,13 +2228,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  		 * Wa_22012826095:dg2
>  		 * Wa_22013059131:dg2
>  		 */
> -		wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
> -				 MAXREQS_PER_BANK,
> -				 REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
> +		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
> +				     MAXREQS_PER_BANK,
> +				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
>  
>  		/* Wa_22013059131:dg2 */
> -		wa_write_or(wal, LSC_CHICKEN_BIT_0,
> -			    FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
> +		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
> +				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
>  	}
>  
>  	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
> @@ -2159,19 +2247,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
>  	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
>  		/* Wa_22013037850:dg2 */
> -		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
> -			    DISABLE_128B_EVICTION_COMMAND_UDW);
> +		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
> +				DISABLE_128B_EVICTION_COMMAND_UDW);
>  
>  		/* Wa_22012856258:dg2 */
> -		wa_masked_en(wal, GEN8_ROW_CHICKEN2,
> -			     GEN12_DISABLE_READ_SUPPRESSION);
> +		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
> +				 GEN12_DISABLE_READ_SUPPRESSION);
>  
>  		/*
>  		 * Wa_22010960976:dg2
>  		 * Wa_14013347512:dg2
>  		 */
> -		wa_masked_dis(wal, XEHP_HDC_CHICKEN0,
> -			      LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
> +		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
> +				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
> @@ -2179,8 +2267,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  		 * Wa_1608949956:dg2_g10
>  		 * Wa_14010198302:dg2_g10
>  		 */
> -		wa_masked_en(wal, GEN8_ROW_CHICKEN,
> -			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
> +		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
> +				 MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
>  
>  		/*
>  		 * Wa_14010918519:dg2_g10
> @@ -2188,31 +2276,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  		 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
>  		 * so ignoring verification.
>  		 */
> -		wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
> -		       FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
> -		       0, false);
> +		wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
> +			   FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
> +			   0, false);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
>  		/* Wa_22010430635:dg2 */
> -		wa_masked_en(wal,
> -			     GEN9_ROW_CHICKEN4,
> -			     GEN12_DISABLE_GRF_CLEAR);
> +		wa_mcr_masked_en(wal,
> +				 GEN9_ROW_CHICKEN4,
> +				 GEN12_DISABLE_GRF_CLEAR);
>  
>  		/* Wa_14010648519:dg2 */
> -		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
> +		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
>  	}
>  
>  	/* Wa_14013202645:dg2 */
>  	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
>  	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
> -		wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
> +		wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
>  
>  	/* Wa_22012532006:dg2 */
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
>  	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
> -		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> -			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
> +		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
> +				 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
>  
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
>  		/* Wa_14010680813:dg2_g10 */
> @@ -2223,17 +2311,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
>  	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
>  		/* Wa_14012362059:dg2 */
> -		wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
>  	}
>  
>  	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
>  	    IS_DG2_G10(i915)) {
>  		/* Wa_22014600077:dg2 */
> -		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
> -		       _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
> -		       0 /* Wa_14012342262 :write-only reg, so skip
> -			    verification */,
> -		       true);
> +		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> +			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
> +			   0 /* Wa_14012342262 write-only reg, so skip verification */,
> +			   true);
>  	}
>  
>  	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
> @@ -2260,7 +2347,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
>  	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
>  		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
> -		wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
> +		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
>  
>  		/*
>  		 * Wa_1407928979:tgl A*
> @@ -2289,14 +2376,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	    IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
>  	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
>  		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
> -		wa_masked_en(wal, GEN8_ROW_CHICKEN2,
> -			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
> +		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
> +				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
>  
>  		/*
>  		 * Wa_1409085225:tgl
>  		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
>  		 */
> -		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
> +		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
>  	}
>  
>  	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
> @@ -2320,9 +2407,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
>  	    IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
>  		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
> -		wa_masked_en(wal,
> -			     GEN10_SAMPLER_MODE,
> -			     ENABLE_SMALLPL);
> +		wa_mcr_masked_en(wal,
> +				 GEN10_SAMPLER_MODE,
> +				 ENABLE_SMALLPL);
>  	}
>  
>  	if (GRAPHICS_VER(i915) == 11) {
> @@ -2356,9 +2443,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  		 * Wa_1405733216:icl
>  		 * Formerly known as WaDisableCleanEvicts
>  		 */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
> +		wa_mcr_write_or(wal,
> +				GEN8_L3SQCREG4,
> +				GEN11_LQSC_CLEAN_EVICT_DISABLE);
>  
>  		/* Wa_1606682166:icl */
>  		wa_write_or(wal,
> @@ -2366,10 +2453,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  			    GEN7_DISABLE_SAMPLER_PREFETCH);
>  
>  		/* Wa_1409178092:icl */
> -		wa_write_clr_set(wal,
> -				 GEN11_SCRATCH2,
> -				 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
> -				 0);
> +		wa_mcr_write_clr_set(wal,
> +				     GEN11_SCRATCH2,
> +				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
> +				     0);
>  
>  		/* WaEnable32PlaneMode:icl */
>  		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
> @@ -2479,30 +2566,30 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
>  
>  		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
> -		wa_write_or(wal,
> -			    BDW_SCRATCH1,
> -			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
> +		wa_mcr_write_or(wal,
> +				BDW_SCRATCH1,
> +				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
>  
>  		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
>  		if (IS_GEN9_LP(i915))
> -			wa_write_clr_set(wal,
> -					 GEN8_L3SQCREG1,
> -					 L3_PRIO_CREDITS_MASK,
> -					 L3_GENERAL_PRIO_CREDITS(62) |
> -					 L3_HIGH_PRIO_CREDITS(2));
> +			wa_mcr_write_clr_set(wal,
> +					     GEN8_L3SQCREG1,
> +					     L3_PRIO_CREDITS_MASK,
> +					     L3_GENERAL_PRIO_CREDITS(62) |
> +					     L3_HIGH_PRIO_CREDITS(2));
>  
>  		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN8_LQSC_FLUSH_COHERENT_LINES);
> +		wa_mcr_write_or(wal,
> +				GEN8_L3SQCREG4,
> +				GEN8_LQSC_FLUSH_COHERENT_LINES);
>  
>  		/* Disable atomics in L3 to prevent unrecoverable hangs */
>  		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
>  				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
> -		wa_write_clr_set(wal, GEN8_L3SQCREG4,
> -				 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
> -		wa_write_clr_set(wal, GEN9_SCRATCH1,
> -				 EVICTION_PERF_FIX_ENABLE, 0);
> +		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
> +				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
> +		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
> +				     EVICTION_PERF_FIX_ENABLE, 0);
>  	}
>  
>  	if (IS_HASWELL(i915)) {
> @@ -2716,7 +2803,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  {
>  	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
>  		/* Wa_14014999345:pvc */
> -		wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
> +		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
>  	}
>  }
>  
> @@ -2742,8 +2829,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
>  	}
>  
>  	if (IS_DG2(i915)) {
> -		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
> -		wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
> +		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
> +		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
>  
>  		/*
>  		 * This is also listed as Wa_22012654132 for certain DG2
> @@ -2754,10 +2841,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
>  		 * back for verification on DG2 (due to Wa_14012342262), so
>  		 * we need to explicitly skip the readback.
>  		 */
> -		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
> -		       _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
> -		       0 /* write-only, so skip validation */,
> -		       true);
> +		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> +			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
> +			   0 /* write-only, so skip validation */,
> +			   true);
>  	}
>  
>  	/*
> @@ -2766,8 +2853,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
>  	 * platforms.
>  	 */
>  	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
> -		wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
> -				    THREAD_EX_ARB_MODE_RR_AFTER_DEP);
> +		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
> +					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
>  }
>  
>  /*
> @@ -2793,30 +2880,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>  
>  	if (IS_XEHPSDV(i915)) {
>  		/* Wa_1409954639 */
> -		wa_masked_en(wal,
> -			     GEN8_ROW_CHICKEN,
> -			     SYSTOLIC_DOP_CLOCK_GATING_DIS);
> +		wa_mcr_masked_en(wal,
> +				 GEN8_ROW_CHICKEN,
> +				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
>  
>  		/* Wa_1607196519 */
> -		wa_masked_en(wal,
> -			     GEN9_ROW_CHICKEN4,
> -			     GEN12_DISABLE_GRF_CLEAR);
> +		wa_mcr_masked_en(wal,
> +				 GEN9_ROW_CHICKEN4,
> +				 GEN12_DISABLE_GRF_CLEAR);
>  
>  		/* Wa_14010670810:xehpsdv */
> -		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
> +		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
>  
>  		/* Wa_14010449647:xehpsdv */
> -		wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> -			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
> +		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
> +				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
>  
>  		/* Wa_18011725039:xehpsdv */
>  		if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
> -			wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
> -			wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
> +			wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
> +			wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
>  		}
>  
>  		/* Wa_14012362059:xehpsdv */
> -		wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
>  
>  		/* Wa_14014368820:xehpsdv */
>  		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
> @@ -2825,19 +2912,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>  
>  	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
>  		/* Wa_14015227452:dg2,pvc */
> -		wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
> +		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
>  
>  		/* Wa_22014226127:dg2,pvc */
> -		wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
> +		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
>  
>  		/* Wa_16015675438:dg2,pvc */
>  		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
>  
>  		/* Wa_18018781329:dg2,pvc */
> -		wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
> -		wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
> -		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> -		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> +		wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
>  	}
>  
>  	if (IS_DG2(i915)) {
> @@ -2845,7 +2932,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>  		 * Wa_16011620976:dg2_g11
>  		 * Wa_22015475538:dg2
>  		 */
> -		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
> +		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> index 8a4b6de4e754..f05b37e56fa9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> @@ -15,7 +15,9 @@ struct i915_wa {
>  	u32		clr;
>  	u32		set;
>  	u32		read;
> -	bool		masked_reg;
> +
> +	u32		masked_reg:1;
> +	u32		is_mcr:1;
>  };
>  
>  struct i915_wa_list {
> -- 
> 2.37.3
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 96b9f02a2284..7671994d5b7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -166,12 +166,33 @@  static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
 	_wa_add(wal, &wa);
 }
 
+static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
+		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+	struct i915_wa wa = {
+		.reg  = reg,
+		.clr  = clear,
+		.set  = set,
+		.read = read_mask,
+		.masked_reg = masked_reg,
+		.is_mcr = 1,
+	};
+
+	_wa_add(wal, &wa);
+}
+
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
 	wa_add(wal, reg, clear, set, clear, false);
 }
 
+static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
+{
+	wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
 static void
 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -184,12 +205,24 @@  wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 	wa_write_clr_set(wal, reg, set, set);
 }
 
+static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
+{
+	wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
 static void
 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 {
 	wa_write_clr_set(wal, reg, clr, 0);
 }
 
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+	wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
 /*
  * WA operations on "masked register". A masked register has the upper 16 bits
  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -207,12 +240,24 @@  wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
 	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
 		    u32 mask, u32 val)
@@ -220,6 +265,13 @@  wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
 	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
+			u32 mask, u32 val)
+{
+	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
 				      struct i915_wa_list *wal)
 {
@@ -241,8 +293,8 @@  static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 
 	/* WaDisablePartialInstShootdown:bdw,chv */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN,
-		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 	 * workaround for a possible hang in the unlikely event a TLB
@@ -288,18 +340,18 @@  static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 	gen8_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
 	/* WaDisableDopClockGating:bdw
 	 *
 	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 	 * to disable EUTC clock gating.
 	 */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN2,
-		     DOP_CLOCK_GATING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+			 DOP_CLOCK_GATING_DISABLE);
 
-	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
-		     GEN8_SAMPLER_POWER_BYPASS_DIS);
+	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+			 GEN8_SAMPLER_POWER_BYPASS_DIS);
 
 	wa_masked_en(wal, HDC_CHICKEN0,
 		     /* WaForceContextSaveRestoreNonCoherent:bdw */
@@ -314,7 +366,7 @@  static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 	gen8_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:chv */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
 	/* Improve HiZ throughput on CHV. */
 	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
@@ -333,21 +385,21 @@  static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 		 */
 		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
-		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-			     GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 	}
 
 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN,
-		     FLOW_CONTROL_ENABLE |
-		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+			 FLOW_CONTROL_ENABLE |
+			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
-	wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-		     GEN9_ENABLE_YV12_BUGFIX |
-		     GEN9_ENABLE_GPGPU_PREEMPTION);
+	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+			 GEN9_ENABLE_YV12_BUGFIX |
+			 GEN9_ENABLE_GPGPU_PREEMPTION);
 
 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
@@ -356,8 +408,8 @@  static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 
 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
-	wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
-		      GEN9_CCS_TLB_PREFETCH_ENABLE);
+	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
+			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 	wa_masked_en(wal, HDC_CHICKEN0,
@@ -386,11 +438,11 @@  static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 	    IS_KABYLAKE(i915) ||
 	    IS_COFFEELAKE(i915) ||
 	    IS_COMETLAKE(i915))
-		wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
-			     GEN8_SAMPLER_POWER_BYPASS_DIS);
+		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+				 GEN8_SAMPLER_POWER_BYPASS_DIS);
 
 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
-	wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
 	/*
 	 * Supporting preemption with fine-granularity requires changes in the
@@ -469,8 +521,8 @@  static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 	gen9_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:bxt */
-	wa_masked_en(wal, GEN8_ROW_CHICKEN,
-		     STALL_DOP_GATING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+			 STALL_DOP_GATING_DISABLE);
 
 	/* WaToEnableHwFixForPushConstHWBug:bxt */
 	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
@@ -490,8 +542,8 @@  static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
-	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
-		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -514,8 +566,8 @@  static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
-	wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
-		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -534,13 +586,13 @@  static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 	 * (the register is whitelisted in hardware now, so UMDs can opt in
 	 * for coherency if they have a good reason).
 	 */
-	wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 
 	/* WaEnableFloatBlendOptimization:icl */
-	wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-	       _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
-	       0 /* write-only, so skip validation */,
-	       true);
+	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+		   0 /* write-only, so skip validation */,
+		   true);
 
 	/* WaDisableGPGPUMidThreadPreemption:icl */
 	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -548,8 +600,8 @@  static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 
 	/* allow headerless messages for preemptible GPGPU context */
-	wa_masked_en(wal, GEN10_SAMPLER_MODE,
-		     GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 
 	/* Wa_1604278689:icl,ehl */
 	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
@@ -558,7 +610,7 @@  static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 			 0xFFFFFFFF);
 
 	/* Wa_1406306137:icl,ehl */
-	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
+	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
 /*
@@ -569,13 +621,13 @@  static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 				   struct i915_wa_list *wal)
 {
 	wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
-	wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
-			 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
-	wa_add(wal,
-	       XEHP_FF_MODE2,
-	       FF_MODE2_TDS_TIMER_MASK,
-	       FF_MODE2_TDS_TIMER_128,
-	       0, false);
+	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+	wa_mcr_add(wal,
+		   XEHP_FF_MODE2,
+		   FF_MODE2_TDS_TIMER_MASK,
+		   FF_MODE2_TDS_TIMER_128,
+		   0, false);
 }
 
 /*
@@ -664,27 +716,27 @@  static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* Wa_16011186671:dg2_g11 */
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
-		wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
-		wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+		wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+		wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
 		/* Wa_14010469329:dg2_g10 */
-		wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
-			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+				 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
 
 		/*
 		 * Wa_22010465075:dg2_g10
 		 * Wa_22010613112:dg2_g10
 		 * Wa_14010698770:dg2_g10
 		 */
-		wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
-			     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+				 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
 	}
 
 	/* Wa_16013271637:dg2 */
-	wa_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
-		     MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 
 	/* Wa_14014947963:dg2 */
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
@@ -1264,9 +1316,9 @@  icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 		    PSDUNIT_CLKGATE_DIS);
 
 	/* Wa_1406680159:icl,ehl */
-	wa_write_or(wal,
-		    GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
-		    GWUNIT_CLKGATE_DIS);
+	wa_mcr_write_or(wal,
+			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+			GWUNIT_CLKGATE_DIS);
 
 	/* Wa_1607087056:icl,ehl,jsl */
 	if (IS_ICELAKE(i915) ||
@@ -1279,7 +1331,7 @@  icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 * This is not a documented workaround, but rather an optimization
 	 * to reduce sampler power.
 	 */
-	wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 /*
@@ -1313,7 +1365,7 @@  gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	wa_14011060649(gt, wal);
 
 	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
-	wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
 }
 
 static void
@@ -1325,9 +1377,9 @@  tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 
 	/* Wa_1409420604:tgl */
 	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
-		wa_write_or(wal,
-			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
-			    CPSSUNIT_CLKGATE_DIS);
+		wa_mcr_write_or(wal,
+				SUBSLICE_UNIT_LEVEL_CLKGATE2,
+				CPSSUNIT_CLKGATE_DIS);
 
 	/* Wa_1607087056:tgl also know as BUG:1409180338 */
 	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
@@ -1356,9 +1408,9 @@  dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 
 	/* Wa_1409420604:dg1 */
 	if (IS_DG1(i915))
-		wa_write_or(wal,
-			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
-			    CPSSUNIT_CLKGATE_DIS);
+		wa_mcr_write_or(wal,
+				SUBSLICE_UNIT_LEVEL_CLKGATE2,
+				CPSSUNIT_CLKGATE_DIS);
 
 	/* Wa_1408615072:dg1 */
 	/* Empirical testing shows this register is unaffected by engine reset. */
@@ -1375,7 +1427,7 @@  xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	xehp_init_mcr(gt, wal);
 
 	/* Wa_1409757795:xehpsdv */
-	wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
 
 	/* Wa_16011155590:xehpsdv */
 	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
@@ -1455,8 +1507,8 @@  dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 			    CG3DDISCFEG_CLKGATE_DIS);
 
 		/* Wa_14011006942:dg2 */
-		wa_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
-			    DSS_ROUTER_CLKGATE_DIS);
+		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+				DSS_ROUTER_CLKGATE_DIS);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
@@ -1467,7 +1519,7 @@  dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 		wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
 
 		/* Wa_14011371254:dg2_g10 */
-		wa_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+		wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
 
 		/* Wa_14011431319:dg2_g10 */
 		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
@@ -1503,21 +1555,21 @@  dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 			    GAMEDIA_CLKGATE_DIS);
 
 		/* Wa_14011028019:dg2_g10 */
-		wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+		wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
 	}
 
 	/* Wa_14014830051:dg2 */
-	wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
 
 	/*
 	 * The following are not actually "workarounds" but rather
 	 * recommended tuning settings documented in the bspec's
 	 * performance guide section.
 	 */
-	wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
+	wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
 
 	/* Wa_14015795083 */
-	wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
 static void
@@ -1526,7 +1578,7 @@  pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	pvc_init_mcr(gt, wal);
 
 	/* Wa_14015795083 */
-	wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
 static void
@@ -1638,14 +1690,25 @@  wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
 		u32 val, old = 0;
 
 		/* open-coded rmw due to steering */
-		old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
+		if (wa->clr)
+			old = wa->is_mcr ?
+				intel_gt_mcr_read_any_fw(gt, wa->reg) :
+				intel_uncore_read_fw(uncore, wa->reg);
 		val = (old & ~wa->clr) | wa->set;
-		if (val != old || !wa->clr)
-			intel_uncore_write_fw(uncore, wa->reg, val);
+		if (val != old || !wa->clr) {
+			if (wa->is_mcr)
+				intel_gt_mcr_multicast_write_fw(gt, wa->reg, val);
+			else
+				intel_uncore_write_fw(uncore, wa->reg, val);
+		}
+
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+			u32 val = wa->is_mcr ?
+				intel_gt_mcr_read_any_fw(gt, wa->reg) :
+				intel_uncore_read_fw(uncore, wa->reg);
 
-		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-			wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
-				  wal->name, "application");
+			wa_verify(wa, val, wal->name, "application");
+		}
 	}
 
 	intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1674,8 +1737,9 @@  static bool wa_list_verify(struct intel_gt *gt,
 	intel_uncore_forcewake_get__locked(uncore, fw);
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
-		ok &= wa_verify(wa,
-				intel_gt_mcr_read_any_fw(gt, wa->reg),
+		ok &= wa_verify(wa, wa->is_mcr ?
+				intel_gt_mcr_read_any_fw(gt, wa->reg) :
+				intel_uncore_read_fw(uncore, wa->reg),
 				wal->name, from);
 
 	intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1721,12 +1785,36 @@  whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
 	_wa_add(wal, &wa);
 }
 
+static void
+whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
+{
+	struct i915_wa wa = {
+		.reg = reg,
+		.is_mcr = 1,
+	};
+
+	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
+		return;
+
+	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
+		return;
+
+	wa.reg.reg |= flags;
+	_wa_add(wal, &wa);
+}
+
 static void
 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
 {
 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
 }
 
+static void
+whitelist_mcr_reg(struct i915_wa_list *wal, i915_reg_t reg)
+{
+	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
+}
+
 static void gen9_whitelist_build(struct i915_wa_list *w)
 {
 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1752,7 +1840,7 @@  static void skl_whitelist_build(struct intel_engine_cs *engine)
 	gen9_whitelist_build(w);
 
 	/* WaDisableLSQCROPERFforOCL:skl */
-	whitelist_reg(w, GEN8_L3SQCREG4);
+	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void bxt_whitelist_build(struct intel_engine_cs *engine)
@@ -1773,7 +1861,7 @@  static void kbl_whitelist_build(struct intel_engine_cs *engine)
 	gen9_whitelist_build(w);
 
 	/* WaDisableLSQCROPERFforOCL:kbl */
-	whitelist_reg(w, GEN8_L3SQCREG4);
+	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
 }
 
 static void glk_whitelist_build(struct intel_engine_cs *engine)
@@ -1838,10 +1926,10 @@  static void icl_whitelist_build(struct intel_engine_cs *engine)
 	switch (engine->class) {
 	case RENDER_CLASS:
 		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
-		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
 
 		/* WaAllowUMDToModifySamplerMode:icl */
-		whitelist_reg(w, GEN10_SAMPLER_MODE);
+		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
 
 		/* WaEnableStateCacheRedirectToCS:icl */
 		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
@@ -2117,21 +2205,21 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14013392000:dg2_g11 */
-		wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
+		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
 	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
 		/* Wa_1509727124:dg2 */
-		wa_masked_en(wal, GEN10_SAMPLER_MODE,
-			     SC_DISABLE_POWER_OPTIMIZATION_EBB);
+		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
 	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14012419201:dg2 */
-		wa_masked_en(wal, GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+				 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
@@ -2140,13 +2228,13 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * Wa_22012826095:dg2
 		 * Wa_22013059131:dg2
 		 */
-		wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
-				 MAXREQS_PER_BANK,
-				 REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+				     MAXREQS_PER_BANK,
+				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
 
 		/* Wa_22013059131:dg2 */
-		wa_write_or(wal, LSC_CHICKEN_BIT_0,
-			    FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
 	}
 
 	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
@@ -2159,19 +2247,19 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
 	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
 		/* Wa_22013037850:dg2 */
-		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
-			    DISABLE_128B_EVICTION_COMMAND_UDW);
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+				DISABLE_128B_EVICTION_COMMAND_UDW);
 
 		/* Wa_22012856258:dg2 */
-		wa_masked_en(wal, GEN8_ROW_CHICKEN2,
-			     GEN12_DISABLE_READ_SUPPRESSION);
+		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+				 GEN12_DISABLE_READ_SUPPRESSION);
 
 		/*
 		 * Wa_22010960976:dg2
 		 * Wa_14013347512:dg2
 		 */
-		wa_masked_dis(wal, XEHP_HDC_CHICKEN0,
-			      LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
+				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
@@ -2179,8 +2267,8 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * Wa_1608949956:dg2_g10
 		 * Wa_14010198302:dg2_g10
 		 */
-		wa_masked_en(wal, GEN8_ROW_CHICKEN,
-			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+				 MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
 
 		/*
 		 * Wa_14010918519:dg2_g10
@@ -2188,31 +2276,31 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
 		 * so ignoring verification.
 		 */
-		wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
-		       FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
-		       0, false);
+		wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+			   FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+			   0, false);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
 		/* Wa_22010430635:dg2 */
-		wa_masked_en(wal,
-			     GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_GRF_CLEAR);
+		wa_mcr_masked_en(wal,
+				 GEN9_ROW_CHICKEN4,
+				 GEN12_DISABLE_GRF_CLEAR);
 
 		/* Wa_14010648519:dg2 */
-		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 	}
 
 	/* Wa_14013202645:dg2 */
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
 	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
-		wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+		wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
 
 	/* Wa_22012532006:dg2 */
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
 	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
-		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+				 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
 
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
 		/* Wa_14010680813:dg2_g10 */
@@ -2223,17 +2311,16 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
 	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14012362059:dg2 */
-		wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
 	    IS_DG2_G10(i915)) {
 		/* Wa_22014600077:dg2 */
-		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-		       _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-		       0 /* Wa_14012342262 :write-only reg, so skip
-			    verification */,
-		       true);
+		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+			   0 /* Wa_14012342262 write-only reg, so skip verification */,
+			   true);
 	}
 
 	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2260,7 +2347,7 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
 		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
-		wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
 
 		/*
 		 * Wa_1407928979:tgl A*
@@ -2289,14 +2376,14 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	    IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
 		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
-		wa_masked_en(wal, GEN8_ROW_CHICKEN2,
-			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
 
 		/*
 		 * Wa_1409085225:tgl
 		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
 		 */
-		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
 	}
 
 	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2320,9 +2407,9 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
 	    IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
 		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
-		wa_masked_en(wal,
-			     GEN10_SAMPLER_MODE,
-			     ENABLE_SMALLPL);
+		wa_mcr_masked_en(wal,
+				 GEN10_SAMPLER_MODE,
+				 ENABLE_SMALLPL);
 	}
 
 	if (GRAPHICS_VER(i915) == 11) {
@@ -2356,9 +2443,9 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * Wa_1405733216:icl
 		 * Formerly known as WaDisableCleanEvicts
 		 */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
+		wa_mcr_write_or(wal,
+				GEN8_L3SQCREG4,
+				GEN11_LQSC_CLEAN_EVICT_DISABLE);
 
 		/* Wa_1606682166:icl */
 		wa_write_or(wal,
@@ -2366,10 +2453,10 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			    GEN7_DISABLE_SAMPLER_PREFETCH);
 
 		/* Wa_1409178092:icl */
-		wa_write_clr_set(wal,
-				 GEN11_SCRATCH2,
-				 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
-				 0);
+		wa_mcr_write_clr_set(wal,
+				     GEN11_SCRATCH2,
+				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
+				     0);
 
 		/* WaEnable32PlaneMode:icl */
 		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
@@ -2479,30 +2566,30 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
 
 		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-		wa_write_or(wal,
-			    BDW_SCRATCH1,
-			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+		wa_mcr_write_or(wal,
+				BDW_SCRATCH1,
+				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
 		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
 		if (IS_GEN9_LP(i915))
-			wa_write_clr_set(wal,
-					 GEN8_L3SQCREG1,
-					 L3_PRIO_CREDITS_MASK,
-					 L3_GENERAL_PRIO_CREDITS(62) |
-					 L3_HIGH_PRIO_CREDITS(2));
+			wa_mcr_write_clr_set(wal,
+					     GEN8_L3SQCREG1,
+					     L3_PRIO_CREDITS_MASK,
+					     L3_GENERAL_PRIO_CREDITS(62) |
+					     L3_HIGH_PRIO_CREDITS(2));
 
 		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN8_LQSC_FLUSH_COHERENT_LINES);
+		wa_mcr_write_or(wal,
+				GEN8_L3SQCREG4,
+				GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 		/* Disable atomics in L3 to prevent unrecoverable hangs */
 		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
 				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-		wa_write_clr_set(wal, GEN8_L3SQCREG4,
-				 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
-		wa_write_clr_set(wal, GEN9_SCRATCH1,
-				 EVICTION_PERF_FIX_ENABLE, 0);
+		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
+				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
+		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
+				     EVICTION_PERF_FIX_ENABLE, 0);
 	}
 
 	if (IS_HASWELL(i915)) {
@@ -2716,7 +2803,7 @@  ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
 	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
 		/* Wa_14014999345:pvc */
-		wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
+		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
 	}
 }
 
@@ -2742,8 +2829,8 @@  add_render_compute_tuning_settings(struct drm_i915_private *i915,
 	}
 
 	if (IS_DG2(i915)) {
-		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-		wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
 
 		/*
 		 * This is also listed as Wa_22012654132 for certain DG2
@@ -2754,10 +2841,10 @@  add_render_compute_tuning_settings(struct drm_i915_private *i915,
 		 * back for verification on DG2 (due to Wa_14012342262), so
 		 * we need to explicitly skip the readback.
 		 */
-		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-		       _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-		       0 /* write-only, so skip validation */,
-		       true);
+		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+			   0 /* write-only, so skip validation */,
+			   true);
 	}
 
 	/*
@@ -2766,8 +2853,8 @@  add_render_compute_tuning_settings(struct drm_i915_private *i915,
 	 * platforms.
 	 */
 	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
-		wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
-				    THREAD_EX_ARB_MODE_RR_AFTER_DEP);
+		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 }
 
 /*
@@ -2793,30 +2880,30 @@  general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
 	if (IS_XEHPSDV(i915)) {
 		/* Wa_1409954639 */
-		wa_masked_en(wal,
-			     GEN8_ROW_CHICKEN,
-			     SYSTOLIC_DOP_CLOCK_GATING_DIS);
+		wa_mcr_masked_en(wal,
+				 GEN8_ROW_CHICKEN,
+				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
 
 		/* Wa_1607196519 */
-		wa_masked_en(wal,
-			     GEN9_ROW_CHICKEN4,
-			     GEN12_DISABLE_GRF_CLEAR);
+		wa_mcr_masked_en(wal,
+				 GEN9_ROW_CHICKEN4,
+				 GEN12_DISABLE_GRF_CLEAR);
 
 		/* Wa_14010670810:xehpsdv */
-		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 
 		/* Wa_14010449647:xehpsdv */
-		wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
-			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
 
 		/* Wa_18011725039:xehpsdv */
 		if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
-			wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
-			wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+			wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+			wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
 		}
 
 		/* Wa_14012362059:xehpsdv */
-		wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
 		/* Wa_14014368820:xehpsdv */
 		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
@@ -2825,19 +2912,19 @@  general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 
 	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
 		/* Wa_14015227452:dg2,pvc */
-		wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
 
 		/* Wa_22014226127:dg2,pvc */
-		wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
 
 		/* Wa_16015675438:dg2,pvc */
 		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
 
 		/* Wa_18018781329:dg2,pvc */
-		wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 	}
 
 	if (IS_DG2(i915)) {
@@ -2845,7 +2932,7 @@  general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 		 * Wa_16011620976:dg2_g11
 		 * Wa_22015475538:dg2
 		 */
-		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index 8a4b6de4e754..f05b37e56fa9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -15,7 +15,9 @@  struct i915_wa {
 	u32		clr;
 	u32		set;
 	u32		read;
-	bool		masked_reg;
+
+	u32		masked_reg:1;
+	u32		is_mcr:1;
 };
 
 struct i915_wa_list {