Message ID | 20221014230239.1023689-12-matthew.d.roper@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Explicit MCR handling and MTL steering | expand |
On 14.10.2022 16:02, Matt Roper wrote: > Let's be more explicit about which of our workarounds are updating MCR > registers. > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_workarounds.c | 433 +++++++++++------- > .../gpu/drm/i915/gt/intel_workarounds_types.h | 4 +- > 2 files changed, 263 insertions(+), 174 deletions(-) Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> Regards, Bala > > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c > index 96b9f02a2284..7671994d5b7a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c > @@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, > _wa_add(wal, &wa); > } > > +static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg, > + u32 clear, u32 set, u32 read_mask, bool masked_reg) > +{ > + struct i915_wa wa = { > + .reg = reg, > + .clr = clear, > + .set = set, > + .read = read_mask, > + .masked_reg = masked_reg, > + .is_mcr = 1, > + }; > + > + _wa_add(wal, &wa); > +} > + > static void > wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) > { > wa_add(wal, reg, clear, set, clear, false); > } > > +static void > +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) > +{ > + wa_mcr_add(wal, reg, clear, set, clear, false); > +} > + > static void > wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) > { > @@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) > wa_write_clr_set(wal, reg, set, set); > } > > +static void > +wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) > +{ > + wa_mcr_write_clr_set(wal, reg, set, set); > +} > + > static void > wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) > { > wa_write_clr_set(wal, reg, clr, 0); > } > > +static void > +wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) > +{ > + wa_mcr_write_clr_set(wal, reg, clr, 0); > +} > + > /* > * WA operations on "masked register". A masked register has the upper 16 bits > * documented as "masked" in b-spec. Its purpose is to allow writing to just a > @@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) > wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); > } > > +static void > +wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) > +{ > + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); > +} > + > static void > wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) > { > wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); > } > > +static void > +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) > +{ > + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); > +} > + > static void > wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, > u32 mask, u32 val) > @@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, > wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); > } > > +static void > +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, > + u32 mask, u32 val) > +{ > + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); > +} > + > static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, > struct i915_wa_list *wal) > { > @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, > wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE); > > /* WaDisablePartialInstShootdown:bdw,chv */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, > - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, > + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > > /* Use Force Non-Coherent whenever executing a 3D context. This is a > * workaround for a possible hang in the unlikely event a TLB > @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, > gen8_ctx_workarounds_init(engine, wal); > > /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); > > /* WaDisableDopClockGating:bdw > * > * Also see the related UCGTCL1 write in bdw_init_clock_gating() > * to disable EUTC clock gating. > */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN2, > - DOP_CLOCK_GATING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, > + DOP_CLOCK_GATING_DISABLE); > > - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, > - GEN8_SAMPLER_POWER_BYPASS_DIS); > + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, > + GEN8_SAMPLER_POWER_BYPASS_DIS); > > wa_masked_en(wal, HDC_CHICKEN0, > /* WaForceContextSaveRestoreNonCoherent:bdw */ > @@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, > gen8_ctx_workarounds_init(engine, wal); > > /* WaDisableThreadStallDopClockGating:chv */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); > > /* Improve HiZ throughput on CHV. */ > wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); > @@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, > */ > wa_masked_en(wal, COMMON_SLICE_CHICKEN2, > GEN9_PBE_COMPRESSED_HASH_SELECTION); > - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); > + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); > } > > /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ > /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, > - FLOW_CONTROL_ENABLE | > - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, > + FLOW_CONTROL_ENABLE | > + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > > /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ > /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ > - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > - GEN9_ENABLE_YV12_BUGFIX | > - GEN9_ENABLE_GPGPU_PREEMPTION); > + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > + GEN9_ENABLE_YV12_BUGFIX | > + GEN9_ENABLE_GPGPU_PREEMPTION); > > /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ > /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ > @@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, > GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); > > /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ > - wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, > - GEN9_CCS_TLB_PREFETCH_ENABLE); > + wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, > + GEN9_CCS_TLB_PREFETCH_ENABLE); > > /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ > wa_masked_en(wal, HDC_CHICKEN0, > @@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, > IS_KABYLAKE(i915) || > IS_COFFEELAKE(i915) || > IS_COMETLAKE(i915)) > - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, > - GEN8_SAMPLER_POWER_BYPASS_DIS); > + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, > + GEN8_SAMPLER_POWER_BYPASS_DIS); > > /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ > - wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); > + wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); > > /* > * Supporting preemption with fine-granularity requires changes in the > @@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, > gen9_ctx_workarounds_init(engine, wal); > > /* WaDisableThreadStallDopClockGating:bxt */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, > - STALL_DOP_GATING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, > + STALL_DOP_GATING_DISABLE); > > /* WaToEnableHwFixForPushConstHWBug:bxt */ > wa_masked_en(wal, COMMON_SLICE_CHICKEN2, > @@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, > GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); > > /* WaDisableSbeCacheDispatchPortSharing:kbl */ > - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); > } > > static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, > @@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, > GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); > > /* WaDisableSbeCacheDispatchPortSharing:cfl */ > - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); > + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); > } > > static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, > @@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, > * (the register is whitelisted in hardware now, so UMDs can opt in > * for coherency if they have a good reason). > */ > - wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); > + wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); > > /* WaEnableFloatBlendOptimization:icl */ > - wa_add(wal, GEN10_CACHE_MODE_SS, 0, > - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), > - 0 /* write-only, so skip validation */, > - true); > + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, > + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), > + 0 /* write-only, so skip validation */, > + true); > > /* WaDisableGPGPUMidThreadPreemption:icl */ > wa_masked_field_set(wal, GEN8_CS_CHICKEN1, > @@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, > GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); > > /* allow headerless messages for preemptible GPGPU context */ > - wa_masked_en(wal, GEN10_SAMPLER_MODE, > - GEN11_SAMPLER_ENABLE_HEADLESS_MSG); > + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, > + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); > > /* Wa_1604278689:icl,ehl */ > wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); > @@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, > 0xFFFFFFFF); > > /* Wa_1406306137:icl,ehl */ > - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); > + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); > } > > /* > @@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, > struct i915_wa_list *wal) > { > wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); > - wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, > - REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); > - wa_add(wal, > - XEHP_FF_MODE2, > - FF_MODE2_TDS_TIMER_MASK, > - FF_MODE2_TDS_TIMER_128, > - 0, false); > + wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, > + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); > + wa_mcr_add(wal, > + XEHP_FF_MODE2, > + FF_MODE2_TDS_TIMER_MASK, > + FF_MODE2_TDS_TIMER_128, > + 0, false); > } > > /* > @@ -664,27 +716,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, > > /* Wa_16011186671:dg2_g11 */ > if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { > - wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); > - wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); > + wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); > + wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); > } > > if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { > /* Wa_14010469329:dg2_g10 */ > - wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, > - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); > + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, > + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); > > /* > * Wa_22010465075:dg2_g10 > * Wa_22010613112:dg2_g10 > * Wa_14010698770:dg2_g10 > */ > - wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, > - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); > + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, > + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); > } > > /* Wa_16013271637:dg2 */ > - wa_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, > - MSC_MSAA_REODER_BUF_BYPASS_DISABLE); > + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, > + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); > > /* Wa_14014947963:dg2 */ > if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || > @@ -1264,9 +1316,9 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > PSDUNIT_CLKGATE_DIS); > > /* Wa_1406680159:icl,ehl */ > - wa_write_or(wal, > - GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, > - GWUNIT_CLKGATE_DIS); > + wa_mcr_write_or(wal, > + GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, > + GWUNIT_CLKGATE_DIS); > > /* Wa_1607087056:icl,ehl,jsl */ > if (IS_ICELAKE(i915) || > @@ -1279,7 +1331,7 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > * This is not a documented workaround, but rather an optimization > * to reduce sampler power. > */ > - wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); > + wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); > } > > /* > @@ -1313,7 +1365,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > wa_14011060649(gt, wal); > > /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ > - wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); > + wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); > } > > static void > @@ -1325,9 +1377,9 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > > /* Wa_1409420604:tgl */ > if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) > - wa_write_or(wal, > - SUBSLICE_UNIT_LEVEL_CLKGATE2, > - CPSSUNIT_CLKGATE_DIS); > + wa_mcr_write_or(wal, > + SUBSLICE_UNIT_LEVEL_CLKGATE2, > + CPSSUNIT_CLKGATE_DIS); > > /* Wa_1607087056:tgl also know as BUG:1409180338 */ > if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) > @@ -1356,9 +1408,9 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > > /* Wa_1409420604:dg1 */ > if (IS_DG1(i915)) > - wa_write_or(wal, > - SUBSLICE_UNIT_LEVEL_CLKGATE2, > - CPSSUNIT_CLKGATE_DIS); > + wa_mcr_write_or(wal, > + SUBSLICE_UNIT_LEVEL_CLKGATE2, > + CPSSUNIT_CLKGATE_DIS); > > /* Wa_1408615072:dg1 */ > /* Empirical testing shows this register is unaffected by engine reset. */ > @@ -1375,7 +1427,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > xehp_init_mcr(gt, wal); > > /* Wa_1409757795:xehpsdv */ > - wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); > + wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB); > > /* Wa_16011155590:xehpsdv */ > if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) > @@ -1455,8 +1507,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > CG3DDISCFEG_CLKGATE_DIS); > > /* Wa_14011006942:dg2 */ > - wa_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, > - DSS_ROUTER_CLKGATE_DIS); > + wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, > + DSS_ROUTER_CLKGATE_DIS); > } > > if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { > @@ -1467,7 +1519,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); > > /* Wa_14011371254:dg2_g10 */ > - wa_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); > + wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); > > /* Wa_14011431319:dg2_g10 */ > wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | > @@ -1503,21 +1555,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > GAMEDIA_CLKGATE_DIS); > > /* Wa_14011028019:dg2_g10 */ > - wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); > + wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); > } > > /* Wa_14014830051:dg2 */ > - wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); > + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); > > /* > * The following are not actually "workarounds" but rather > * recommended tuning settings documented in the bspec's > * performance guide section. > */ > - wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); > + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); > > /* Wa_14015795083 */ > - wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); > + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); > } > > static void > @@ -1526,7 +1578,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) > pvc_init_mcr(gt, wal); > > /* Wa_14015795083 */ > - wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); > + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); > } > > static void > @@ -1638,14 +1690,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) > u32 val, old = 0; > > /* open-coded rmw due to steering */ > - old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; > + if (wa->clr) > + old = wa->is_mcr ? > + intel_gt_mcr_read_any_fw(gt, wa->reg) : > + intel_uncore_read_fw(uncore, wa->reg); > val = (old & ~wa->clr) | wa->set; > - if (val != old || !wa->clr) > - intel_uncore_write_fw(uncore, wa->reg, val); > + if (val != old || !wa->clr) { > + if (wa->is_mcr) > + intel_gt_mcr_multicast_write_fw(gt, wa->reg, val); > + else > + intel_uncore_write_fw(uncore, wa->reg, val); > + } > + > + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { > + u32 val = wa->is_mcr ? > + intel_gt_mcr_read_any_fw(gt, wa->reg) : > + intel_uncore_read_fw(uncore, wa->reg); > > - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > - wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), > - wal->name, "application"); > + wa_verify(wa, val, wal->name, "application"); > + } > } > > intel_uncore_forcewake_put__locked(uncore, fw); > @@ -1674,8 +1737,9 @@ static bool wa_list_verify(struct intel_gt *gt, > intel_uncore_forcewake_get__locked(uncore, fw); > > for (i = 0, wa = wal->list; i < wal->count; i++, wa++) > - ok &= wa_verify(wa, > - intel_gt_mcr_read_any_fw(gt, wa->reg), > + ok &= wa_verify(wa, wa->is_mcr ? > + intel_gt_mcr_read_any_fw(gt, wa->reg) : > + intel_uncore_read_fw(uncore, wa->reg), > wal->name, from); > > intel_uncore_forcewake_put__locked(uncore, fw); > @@ -1721,12 +1785,36 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) > _wa_add(wal, &wa); > } > > +static void > +whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) > +{ > + struct i915_wa wa = { > + .reg = reg, > + .is_mcr = 1, > + }; > + > + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) > + return; > + > + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) > + return; > + > + wa.reg.reg |= flags; > + _wa_add(wal, &wa); > +} > + > static void > whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) > { > whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); > } > > +static void > +whitelist_mcr_reg(struct i915_wa_list *wal, i915_reg_t reg) > +{ > + whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); > +} > + > static void gen9_whitelist_build(struct i915_wa_list *w) > { > /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ > @@ -1752,7 +1840,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine) > gen9_whitelist_build(w); > > /* WaDisableLSQCROPERFforOCL:skl */ > - whitelist_reg(w, GEN8_L3SQCREG4); > + whitelist_mcr_reg(w, GEN8_L3SQCREG4); > } > > static void bxt_whitelist_build(struct intel_engine_cs *engine) > @@ -1773,7 +1861,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine) > gen9_whitelist_build(w); > > /* WaDisableLSQCROPERFforOCL:kbl */ > - whitelist_reg(w, GEN8_L3SQCREG4); > + whitelist_mcr_reg(w, GEN8_L3SQCREG4); > } > > static void glk_whitelist_build(struct intel_engine_cs *engine) > @@ -1838,10 +1926,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) > switch (engine->class) { > case RENDER_CLASS: > /* WaAllowUMDToModifyHalfSliceChicken7:icl */ > - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); > + whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7); > > /* WaAllowUMDToModifySamplerMode:icl */ > - whitelist_reg(w, GEN10_SAMPLER_MODE); > + whitelist_mcr_reg(w, GEN10_SAMPLER_MODE); > > /* WaEnableStateCacheRedirectToCS:icl */ > whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); > @@ -2117,21 +2205,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > > if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { > /* Wa_14013392000:dg2_g11 */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || > IS_DG2_G11(i915) || IS_DG2_G12(i915)) { > /* Wa_1509727124:dg2 */ > - wa_masked_en(wal, GEN10_SAMPLER_MODE, > - SC_DISABLE_POWER_OPTIMIZATION_EBB); > + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, > + SC_DISABLE_POWER_OPTIMIZATION_EBB); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || > IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { > /* Wa_14012419201:dg2 */ > - wa_masked_en(wal, GEN9_ROW_CHICKEN4, > - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); > + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, > + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || > @@ -2140,13 +2228,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * Wa_22012826095:dg2 > * Wa_22013059131:dg2 > */ > - wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, > - MAXREQS_PER_BANK, > - REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); > + wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, > + MAXREQS_PER_BANK, > + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); > > /* Wa_22013059131:dg2 */ > - wa_write_or(wal, LSC_CHICKEN_BIT_0, > - FORCE_1_SUB_MESSAGE_PER_FRAGMENT); > + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, > + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); > } > > /* Wa_1308578152:dg2_g10 when first gslice is fused off */ > @@ -2159,19 +2247,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || > IS_DG2_G11(i915) || IS_DG2_G12(i915)) { > /* Wa_22013037850:dg2 */ > - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, > - DISABLE_128B_EVICTION_COMMAND_UDW); > + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, > + DISABLE_128B_EVICTION_COMMAND_UDW); > > /* Wa_22012856258:dg2 */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN2, > - GEN12_DISABLE_READ_SUPPRESSION); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, > + GEN12_DISABLE_READ_SUPPRESSION); > > /* > * Wa_22010960976:dg2 > * Wa_14013347512:dg2 > */ > - wa_masked_dis(wal, XEHP_HDC_CHICKEN0, > - LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); > + wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, > + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { > @@ -2179,8 +2267,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * Wa_1608949956:dg2_g10 > * Wa_14010198302:dg2_g10 > */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN, > - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, > + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); > > /* > * Wa_14010918519:dg2_g10 > @@ -2188,31 +2276,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, > * so ignoring verification. > */ > - wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, > - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, > - 0, false); > + wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, > + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, > + 0, false); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { > /* Wa_22010430635:dg2 */ > - wa_masked_en(wal, > - GEN9_ROW_CHICKEN4, > - GEN12_DISABLE_GRF_CLEAR); > + wa_mcr_masked_en(wal, > + GEN9_ROW_CHICKEN4, > + GEN12_DISABLE_GRF_CLEAR); > > /* Wa_14010648519:dg2 */ > - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); > + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); > } > > /* Wa_14013202645:dg2 */ > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || > IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) > - wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); > + wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); > > /* Wa_22012532006:dg2 */ > if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || > IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) > - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); > + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, > + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); > > if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { > /* Wa_14010680813:dg2_g10 */ > @@ -2223,17 +2311,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || > IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { > /* Wa_14012362059:dg2 */ > - wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); > } > > if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || > IS_DG2_G10(i915)) { > /* Wa_22014600077:dg2 */ > - wa_add(wal, GEN10_CACHE_MODE_SS, 0, > - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), > - 0 /* Wa_14012342262 :write-only reg, so skip > - verification */, > - true); > + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, > + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), > + 0 /* Wa_14012342262 write-only reg, so skip verification */, > + true); > } > > if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || > @@ -2260,7 +2347,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || > IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { > /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); > > /* > * Wa_1407928979:tgl A* > @@ -2289,14 +2376,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || > IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { > /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ > - wa_masked_en(wal, GEN8_ROW_CHICKEN2, > - GEN12_PUSH_CONST_DEREF_HOLD_DIS); > + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, > + GEN12_PUSH_CONST_DEREF_HOLD_DIS); > > /* > * Wa_1409085225:tgl > * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p > */ > - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); > + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); > } > > if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || > @@ -2320,9 +2407,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || > IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { > /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ > - wa_masked_en(wal, > - GEN10_SAMPLER_MODE, > - ENABLE_SMALLPL); > + wa_mcr_masked_en(wal, > + GEN10_SAMPLER_MODE, > + ENABLE_SMALLPL); > } > > if (GRAPHICS_VER(i915) == 11) { > @@ -2356,9 +2443,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * Wa_1405733216:icl > * Formerly known as WaDisableCleanEvicts > */ > - wa_write_or(wal, > - GEN8_L3SQCREG4, > - GEN11_LQSC_CLEAN_EVICT_DISABLE); > + wa_mcr_write_or(wal, > + GEN8_L3SQCREG4, > + GEN11_LQSC_CLEAN_EVICT_DISABLE); > > /* Wa_1606682166:icl */ > wa_write_or(wal, > @@ -2366,10 +2453,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > GEN7_DISABLE_SAMPLER_PREFETCH); > > /* Wa_1409178092:icl */ > - wa_write_clr_set(wal, > - GEN11_SCRATCH2, > - GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, > - 0); > + wa_mcr_write_clr_set(wal, > + GEN11_SCRATCH2, > + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, > + 0); > > /* WaEnable32PlaneMode:icl */ > wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, > @@ -2479,30 +2566,30 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); > > /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ > - wa_write_or(wal, > - BDW_SCRATCH1, > - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); > + wa_mcr_write_or(wal, > + BDW_SCRATCH1, > + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); > > /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ > if (IS_GEN9_LP(i915)) > - wa_write_clr_set(wal, > - GEN8_L3SQCREG1, > - L3_PRIO_CREDITS_MASK, > - L3_GENERAL_PRIO_CREDITS(62) | > - L3_HIGH_PRIO_CREDITS(2)); > + wa_mcr_write_clr_set(wal, > + GEN8_L3SQCREG1, > + L3_PRIO_CREDITS_MASK, > + L3_GENERAL_PRIO_CREDITS(62) | > + L3_HIGH_PRIO_CREDITS(2)); > > /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ > - wa_write_or(wal, > - GEN8_L3SQCREG4, > - GEN8_LQSC_FLUSH_COHERENT_LINES); > + wa_mcr_write_or(wal, > + GEN8_L3SQCREG4, > + GEN8_LQSC_FLUSH_COHERENT_LINES); > > /* Disable atomics in L3 to prevent unrecoverable hangs */ > wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, > GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); > - wa_write_clr_set(wal, GEN8_L3SQCREG4, > - GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); > - wa_write_clr_set(wal, GEN9_SCRATCH1, > - EVICTION_PERF_FIX_ENABLE, 0); > + wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4, > + GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); > + wa_mcr_write_clr_set(wal, GEN9_SCRATCH1, > + EVICTION_PERF_FIX_ENABLE, 0); > } > > if (IS_HASWELL(i915)) { > @@ -2716,7 +2803,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > { > if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { > /* Wa_14014999345:pvc */ > - wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); > + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); > } > } > > @@ -2742,8 +2829,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, > } > > if (IS_DG2(i915)) { > - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); > - wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); > + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); > + wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); > > /* > * This is also listed as Wa_22012654132 for certain DG2 > @@ -2754,10 +2841,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, > * back for verification on DG2 (due to Wa_14012342262), so > * we need to explicitly skip the readback. > */ > - wa_add(wal, GEN10_CACHE_MODE_SS, 0, > - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), > - 0 /* write-only, so skip validation */, > - true); > + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, > + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), > + 0 /* write-only, so skip validation */, > + true); > } > > /* > @@ -2766,8 +2853,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, > * platforms. > */ > if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) > - wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, > - THREAD_EX_ARB_MODE_RR_AFTER_DEP); > + wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, > + THREAD_EX_ARB_MODE_RR_AFTER_DEP); > } > > /* > @@ -2793,30 +2880,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li > > if (IS_XEHPSDV(i915)) { > /* Wa_1409954639 */ > - wa_masked_en(wal, > - GEN8_ROW_CHICKEN, > - SYSTOLIC_DOP_CLOCK_GATING_DIS); > + wa_mcr_masked_en(wal, > + GEN8_ROW_CHICKEN, > + SYSTOLIC_DOP_CLOCK_GATING_DIS); > > /* Wa_1607196519 */ > - wa_masked_en(wal, > - GEN9_ROW_CHICKEN4, > - GEN12_DISABLE_GRF_CLEAR); > + wa_mcr_masked_en(wal, > + GEN9_ROW_CHICKEN4, > + GEN12_DISABLE_GRF_CLEAR); > > /* Wa_14010670810:xehpsdv */ > - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); > + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); > > /* Wa_14010449647:xehpsdv */ > - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); > + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, > + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); > > /* Wa_18011725039:xehpsdv */ > if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { > - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); > - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); > + wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER); > + wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); > } > > /* Wa_14012362059:xehpsdv */ > - wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); > > /* Wa_14014368820:xehpsdv */ > wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | > @@ -2825,19 +2912,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li > > if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { > /* Wa_14015227452:dg2,pvc */ > - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); > + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); > > /* Wa_22014226127:dg2,pvc */ > - wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); > + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); > > /* Wa_16015675438:dg2,pvc */ > wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); > > /* Wa_18018781329:dg2,pvc */ > - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); > - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); > - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); > - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); > + wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); > } > > if (IS_DG2(i915)) { > @@ -2845,7 +2932,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li > * Wa_16011620976:dg2_g11 > * Wa_22015475538:dg2 > */ > - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); > + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); > } > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h > index 8a4b6de4e754..f05b37e56fa9 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h > @@ -15,7 +15,9 @@ struct i915_wa { > u32 clr; > u32 set; > u32 read; > - bool masked_reg; > + > + u32 masked_reg:1; > + u32 is_mcr:1; > }; > > struct i915_wa_list { > -- > 2.37.3 >
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 96b9f02a2284..7671994d5b7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, _wa_add(wal, &wa); } +static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg, + u32 clear, u32 set, u32 read_mask, bool masked_reg) +{ + struct i915_wa wa = { + .reg = reg, + .clr = clear, + .set = set, + .read = read_mask, + .masked_reg = masked_reg, + .is_mcr = 1, + }; + + _wa_add(wal, &wa); +} + static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { wa_add(wal, reg, clear, set, clear, false); } +static void +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) +{ + wa_mcr_add(wal, reg, clear, set, clear, false); +} + static void wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { @@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_clr_set(wal, reg, set, set); } +static void +wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) +{ + wa_mcr_write_clr_set(wal, reg, set, set); +} + static void wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) { wa_write_clr_set(wal, reg, clr, 0); } +static void +wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_mcr_write_clr_set(wal, reg, clr, 0); +} + /* * WA operations on "masked register". A masked register has the upper 16 bits * documented as "masked" in b-spec. Its purpose is to allow writing to just a @@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); } +static void +wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); +} + static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } +static void +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); +} + static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) @@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } +static void +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, + u32 mask, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); +} + static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE); /* WaDisablePartialInstShootdown:bdw,chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for a possible hang in the unlikely event a TLB @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw * * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); wa_masked_en(wal, HDC_CHICKEN0, /* WaForceContextSaveRestoreNonCoherent:bdw */ @@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* Improve HiZ throughput on CHV. */ wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); @@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN9_PBE_COMPRESSED_HASH_SELECTION); - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ @@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); + wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ wa_masked_en(wal, HDC_CHICKEN0, @@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* * Supporting preemption with fine-granularity requires changes in the @@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bxt */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaToEnableHwFixForPushConstHWBug:bxt */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, @@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:cfl */ - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, * (the register is whitelisted in hardware now, so UMDs can opt in * for coherency if they have a good reason). */ - wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); /* WaEnableFloatBlendOptimization:icl */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); /* allow headerless messages for preemptible GPGPU context */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); /* Wa_1604278689:icl,ehl */ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); @@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 0xFFFFFFFF); /* Wa_1406306137:icl,ehl */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } /* @@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); - wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, - REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); - wa_add(wal, - XEHP_FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, - 0, false); + wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_mcr_add(wal, + XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); } /* @@ -664,27 +716,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16011186671:dg2_g11 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); } if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010469329:dg2_g10 */ - wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); /* * Wa_22010465075:dg2_g10 * Wa_22010613112:dg2_g10 * Wa_14010698770:dg2_g10 */ - wa_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } /* Wa_16013271637:dg2 */ - wa_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, - MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || @@ -1264,9 +1316,9 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) PSDUNIT_CLKGATE_DIS); /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || @@ -1279,7 +1331,7 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * This is not a documented workaround, but rather an optimization * to reduce sampler power. */ - wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -1313,7 +1365,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_14011060649(gt, wal); /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ - wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void @@ -1325,9 +1377,9 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409420604:tgl */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) @@ -1356,9 +1408,9 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409420604:dg1 */ if (IS_DG1(i915)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1408615072:dg1 */ /* Empirical testing shows this register is unaffected by engine reset. */ @@ -1375,7 +1427,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) xehp_init_mcr(gt, wal); /* Wa_1409757795:xehpsdv */ - wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB); /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) @@ -1455,8 +1507,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) CG3DDISCFEG_CLKGATE_DIS); /* Wa_14011006942:dg2 */ - wa_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, - DSS_ROUTER_CLKGATE_DIS); + wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); } if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { @@ -1467,7 +1519,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); /* Wa_14011371254:dg2_g10 */ - wa_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); /* Wa_14011431319:dg2_g10 */ wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | @@ -1503,21 +1555,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMEDIA_CLKGATE_DIS); /* Wa_14011028019:dg2_g10 */ - wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); } /* Wa_14014830051:dg2 */ - wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); /* * The following are not actually "workarounds" but rather * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); /* Wa_14015795083 */ - wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1526,7 +1578,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) pvc_init_mcr(gt, wal); /* Wa_14015795083 */ - wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1638,14 +1690,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) u32 val, old = 0; /* open-coded rmw due to steering */ - old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; + if (wa->clr) + old = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->reg) : + intel_uncore_read_fw(uncore, wa->reg); val = (old & ~wa->clr) | wa->set; - if (val != old || !wa->clr) - intel_uncore_write_fw(uncore, wa->reg, val); + if (val != old || !wa->clr) { + if (wa->is_mcr) + intel_gt_mcr_multicast_write_fw(gt, wa->reg, val); + else + intel_uncore_write_fw(uncore, wa->reg, val); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + u32 val = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->reg) : + intel_uncore_read_fw(uncore, wa->reg); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), - wal->name, "application"); + wa_verify(wa, val, wal->name, "application"); + } } intel_uncore_forcewake_put__locked(uncore, fw); @@ -1674,8 +1737,9 @@ static bool wa_list_verify(struct intel_gt *gt, intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ok &= wa_verify(wa, - intel_gt_mcr_read_any_fw(gt, wa->reg), + ok &= wa_verify(wa, wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->reg) : + intel_uncore_read_fw(uncore, wa->reg), wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); @@ -1721,12 +1785,36 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) _wa_add(wal, &wa); } +static void +whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) +{ + struct i915_wa wa = { + .reg = reg, + .is_mcr = 1, + }; + + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) + return; + + wa.reg.reg |= flags; + _wa_add(wal, &wa); +} + static void whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); } +static void +whitelist_mcr_reg(struct i915_wa_list *wal, i915_reg_t reg) +{ + whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); +} + static void gen9_whitelist_build(struct i915_wa_list *w) { /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ @@ -1752,7 +1840,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void bxt_whitelist_build(struct intel_engine_cs *engine) @@ -1773,7 +1861,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void glk_whitelist_build(struct intel_engine_cs *engine) @@ -1838,10 +1926,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7); /* WaAllowUMDToModifySamplerMode:icl */ - whitelist_reg(w, GEN10_SAMPLER_MODE); + whitelist_mcr_reg(w, GEN10_SAMPLER_MODE); /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); @@ -2117,21 +2205,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14013392000:dg2_g11 */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_1509727124:dg2 */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - SC_DISABLE_POWER_OPTIMIZATION_EBB); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || @@ -2140,13 +2228,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_22012826095:dg2 * Wa_22013059131:dg2 */ - wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, - MAXREQS_PER_BANK, - REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); /* Wa_22013059131:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, - FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); } /* Wa_1308578152:dg2_g10 when first gslice is fused off */ @@ -2159,19 +2247,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_22013037850:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, - DISABLE_128B_EVICTION_COMMAND_UDW); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); /* Wa_22012856258:dg2 */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, - GEN12_DISABLE_READ_SUPPRESSION); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 */ - wa_masked_dis(wal, XEHP_HDC_CHICKEN0, - LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { @@ -2179,8 +2267,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1608949956:dg2_g10 * Wa_14010198302:dg2_g10 */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); /* * Wa_14010918519:dg2_g10 @@ -2188,31 +2276,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, * so ignoring verification. */ - wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* Wa_22010430635:dg2 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010648519:dg2 */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); /* Wa_22012532006:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010680813:dg2_g10 */ @@ -2223,17 +2311,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012362059:dg2 */ - wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 :write-only reg, so skip - verification */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), + 0 /* Wa_14012342262 write-only reg, so skip verification */, + true); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2260,7 +2347,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* @@ -2289,14 +2376,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN8_ROW_CHICKEN2, - GEN12_PUSH_CONST_DEREF_HOLD_DIS); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2320,9 +2407,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (GRAPHICS_VER(i915) == 11) { @@ -2356,9 +2443,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* Wa_1606682166:icl */ wa_write_or(wal, @@ -2366,10 +2453,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); /* Wa_1409178092:icl */ - wa_write_clr_set(wal, - GEN11_SCRATCH2, - GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, - 0); + wa_mcr_write_clr_set(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); /* WaEnable32PlaneMode:icl */ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, @@ -2479,30 +2566,30 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - wa_write_or(wal, - BDW_SCRATCH1, - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + wa_mcr_write_or(wal, + BDW_SCRATCH1, + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ if (IS_GEN9_LP(i915)) - wa_write_clr_set(wal, - GEN8_L3SQCREG1, - L3_PRIO_CREDITS_MASK, - L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + wa_mcr_write_clr_set(wal, + GEN8_L3SQCREG1, + L3_PRIO_CREDITS_MASK, + L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* Disable atomics in L3 to prevent unrecoverable hangs */ wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN8_L3SQCREG4, - GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN9_SCRATCH1, - EVICTION_PERF_FIX_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4, + GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN9_SCRATCH1, + EVICTION_PERF_FIX_ENABLE, 0); } if (IS_HASWELL(i915)) { @@ -2716,7 +2803,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { /* Wa_14014999345:pvc */ - wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); } } @@ -2742,8 +2829,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, } if (IS_DG2(i915)) { - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* * This is also listed as Wa_22012654132 for certain DG2 @@ -2754,10 +2841,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * back for verification on DG2 (due to Wa_14012342262), so * we need to explicitly skip the readback. */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } /* @@ -2766,8 +2853,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * platforms. */ if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) - wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, - THREAD_EX_ARB_MODE_RR_AFTER_DEP); + wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* @@ -2793,30 +2880,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ - wa_masked_en(wal, - GEN8_ROW_CHICKEN, - SYSTOLIC_DOP_CLOCK_GATING_DIS); + wa_mcr_masked_en(wal, + GEN8_ROW_CHICKEN, + SYSTOLIC_DOP_CLOCK_GATING_DIS); /* Wa_1607196519 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010670810:xehpsdv */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); /* Wa_14010449647:xehpsdv */ - wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); /* Wa_18011725039:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); } /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_14014368820:xehpsdv */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | @@ -2825,19 +2912,19 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { /* Wa_14015227452:dg2,pvc */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); /* Wa_22014226127:dg2,pvc */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); /* Wa_16015675438:dg2,pvc */ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); /* Wa_18018781329:dg2,pvc */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2(i915)) { @@ -2845,7 +2932,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_16011620976:dg2_g11 * Wa_22015475538:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 8a4b6de4e754..f05b37e56fa9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -15,7 +15,9 @@ struct i915_wa { u32 clr; u32 set; u32 read; - bool masked_reg; + + u32 masked_reg:1; + u32 is_mcr:1; }; struct i915_wa_list {
Let's be more explicit about which of our workarounds are updating MCR registers. Signed-off-by: Matt Roper <matthew.d.roper@intel.com> --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 433 +++++++++++------- .../gpu/drm/i915/gt/intel_workarounds_types.h | 4 +- 2 files changed, 263 insertions(+), 174 deletions(-)