@@ -5513,6 +5513,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
* FIXME: break up the workarounds and apply them at the right time!
*/
intel_init_clock_gating(dev_priv);
+ intel_ctx_workarounds_apply(dev_priv);
ret = __intel_engines_record_defaults(dev_priv);
if (ret)
@@ -470,16 +470,11 @@ static bool needs_preempt_context(struct drm_i915_private *i915)
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
{
struct i915_gem_context *ctx;
- int ret;
/* Reassure ourselves we are only called once */
GEM_BUG_ON(dev_priv->kernel_context);
GEM_BUG_ON(dev_priv->preempt_context);
- ret = intel_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
-
INIT_LIST_HEAD(&dev_priv->contexts.list);
INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
init_llist_head(&dev_priv->contexts.free_list);
@@ -2283,10 +2283,6 @@ static int gen8_init_rcs_context(struct i915_request *rq)
{
int ret;
- ret = intel_ctx_workarounds_emit(rq);
- if (ret)
- return ret;
-
ret = intel_rcs_context_init_mocs(rq);
/*
* Failing to program the MOCS is non-fatal.The system will not
@@ -601,10 +601,6 @@ static int intel_rcs_ctx_init(struct i915_request *rq)
{
int ret;
- ret = intel_ctx_workarounds_emit(rq);
- if (ret != 0)
- return ret;
-
ret = i915_gem_render_state_emit(rq);
if (ret)
return ret;
@@ -48,14 +48,16 @@
* - Public functions to init or apply the given workaround type.
*/
-static void wa_add(struct drm_i915_private *i915,
+static void wa_add(struct drm_i915_private *dev_priv,
i915_reg_t reg, const u32 mask, const u32 val)
{
- struct i915_workarounds *wa = &i915->workarounds;
+ struct i915_workarounds *wa = &dev_priv->workarounds;
unsigned int start = 0, end = wa->count;
unsigned int addr = i915_mmio_reg_offset(reg);
struct i915_wa_reg *r;
+ I915_WRITE(reg, val);
+
while (start < end) {
unsigned int mid = start + (end - start) / 2;
@@ -110,7 +112,7 @@ static void wa_add(struct drm_i915_private *i915,
#define WA_SET_FIELD_MASKED(addr, mask, value) \
WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
-static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void gen8_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
@@ -121,17 +123,20 @@ static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
+ /*
+ * Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
+ *
+ * WaForceEnableNonCoherent:bdw,chv
+ * WaHdcDisableFetchWhenMasked:bdw,chv
*/
- /* WaForceEnableNonCoherent:bdw,chv */
- /* WaHdcDisableFetchWhenMasked:bdw,chv */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
HDC_DONOT_FETCH_MEM_WHEN_MASKED |
HDC_FORCE_NON_COHERENT);
- /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
+ /*
+ * From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
* "The Hierarchical Z RAW Stall Optimization allows non-overlapping
* polygons in the same 8x4 pixel/sample area to be processed without
* stalling waiting for the earlier ones to write to Hierarchical Z
@@ -155,22 +160,17 @@ static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
WA_SET_FIELD_MASKED(GEN7_GT_MODE,
GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4);
-
- return 0;
}
-static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void bdw_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen8_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen8_ctx_workarounds_apply(dev_priv);
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
- /* WaDisableDopClockGating:bdw
+ /*
+ * WaDisableDopClockGating:bdw
*
* Also see the related UCGTCL1 write in broadwell_init_clock_gating()
* to disable EUTC clock gating.
@@ -186,31 +186,24 @@ static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
(IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
-
- return 0;
}
-static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void chv_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen8_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen8_ctx_workarounds_apply(dev_priv);
/* WaDisableThreadStallDopClockGating:chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
/* Improve HiZ throughput on CHV. */
WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
-
- return 0;
}
-static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void gen9_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
if (HAS_LLC(dev_priv)) {
- /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+ /*
+ * WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
*
* Must match Display Engine. See
* WaCompressedResourceDisplayNewHashMode.
@@ -253,7 +246,8 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
- /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+ /*
+ * WaForceEnableNonCoherent and WaDisableHDCInvalidation are
* both tied to WaForceContextSaveRestoreNonCoherent
* in some hsds for skl. We keep the tie for all gen9. The
* documentation is a bit hazy and so we want to get common behaviour,
@@ -302,11 +296,9 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
if (IS_GEN9_LP(dev_priv))
WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
-
- return 0;
}
-static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
+static void skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
{
u8 vals[3] = { 0, 0, 0 };
unsigned int i;
@@ -332,7 +324,7 @@ static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
}
if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
- return 0;
+ return;
/* Tune IZ hashing. See intel_device_info_runtime_init() */
WA_SET_FIELD_MASKED(GEN7_GT_MODE,
@@ -342,28 +334,18 @@ static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
GEN9_IZ_HASHING(2, vals[2]) |
GEN9_IZ_HASHING(1, vals[1]) |
GEN9_IZ_HASHING(0, vals[0]));
-
- return 0;
}
-static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void skl_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen9_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen9_ctx_workarounds_apply(dev_priv);
- return skl_tune_iz_hashing(dev_priv);
+ skl_tune_iz_hashing(dev_priv);
}
-static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void bxt_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen9_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen9_ctx_workarounds_apply(dev_priv);
/* WaDisableThreadStallDopClockGating:bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -372,17 +354,11 @@ static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaToEnableHwFixForPushConstHWBug:bxt */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- return 0;
}
-static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void kbl_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen9_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen9_ctx_workarounds_apply(dev_priv);
/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
@@ -397,32 +373,20 @@ static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaDisableSbeCacheDispatchPortSharing:kbl */
WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
- return 0;
}
-static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void glk_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen9_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen9_ctx_workarounds_apply(dev_priv);
/* WaToEnableHwFixForPushConstHWBug:glk */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- return 0;
}
-static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void cfl_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int ret;
-
- ret = gen9_ctx_workarounds_init(dev_priv);
- if (ret)
- return ret;
+ gen9_ctx_workarounds_apply(dev_priv);
/* WaToEnableHwFixForPushConstHWBug:cfl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
@@ -431,11 +395,9 @@ static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaDisableSbeCacheDispatchPortSharing:cfl */
WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
- return 0;
}
-static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void cnl_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
/* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
@@ -470,20 +432,20 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaDisableEarlyEOT:cnl */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
-
- return 0;
}
-static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+static void icl_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- /* Wa_1604370585:icl (pre-prod)
+ /*
+ * Wa_1604370585:icl (pre-prod)
* Formerly known as WaPushConstantDereferenceHoldDisable
*/
if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
PUSH_CONSTANT_DEREF_DISABLE);
- /* WaForceEnableNonCoherent:icl
+ /*
+ * WaForceEnableNonCoherent:icl
* This is not the same workaround as in early Gen9 platforms, where
* lacking this could cause system hangs, but coherency performance
* overhead is high and only a few compute workloads really need it
@@ -492,7 +454,8 @@ static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
*/
WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
- /* Wa_2006611047:icl (pre-prod)
+ /*
+ * Wa_2006611047:icl (pre-prod)
* Formerly known as WaDisableImprovedTdlClkGating
*/
if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
@@ -510,77 +473,48 @@ static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
/* WaEnableFloatBlendOptimization:icl */
WA_SET_BIT_MASKED(GEN10_CACHE_MODE_SS, FLOAT_BLEND_OPTIMIZATION_ENABLE);
-
- return 0;
}
-int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+void intel_ctx_workarounds_apply(struct drm_i915_private *dev_priv)
{
- int err = 0;
+ /*
+ * We rely on not losing the active state before we save these
+ * register writes into the default context image, so we must be
+ * sure to keep forcewake held at all times. We use immediate MMIO
+ * writes to update the current register state, which are then
+ * inherited by the first context and saved into the default
+ * context image; after which this register state is reused for all
+ * user contexts.
+ */
+ assert_forcewakes_active(dev_priv, FORCEWAKE_ALL);
dev_priv->workarounds.count = 0;
if (INTEL_GEN(dev_priv) < 8)
- err = 0;
+ ;
else if (IS_BROADWELL(dev_priv))
- err = bdw_ctx_workarounds_init(dev_priv);
+ bdw_ctx_workarounds_apply(dev_priv);
else if (IS_CHERRYVIEW(dev_priv))
- err = chv_ctx_workarounds_init(dev_priv);
+ chv_ctx_workarounds_apply(dev_priv);
else if (IS_SKYLAKE(dev_priv))
- err = skl_ctx_workarounds_init(dev_priv);
+ skl_ctx_workarounds_apply(dev_priv);
else if (IS_BROXTON(dev_priv))
- err = bxt_ctx_workarounds_init(dev_priv);
+ bxt_ctx_workarounds_apply(dev_priv);
else if (IS_KABYLAKE(dev_priv))
- err = kbl_ctx_workarounds_init(dev_priv);
+ kbl_ctx_workarounds_apply(dev_priv);
else if (IS_GEMINILAKE(dev_priv))
- err = glk_ctx_workarounds_init(dev_priv);
+ glk_ctx_workarounds_apply(dev_priv);
else if (IS_COFFEELAKE(dev_priv))
- err = cfl_ctx_workarounds_init(dev_priv);
+ cfl_ctx_workarounds_apply(dev_priv);
else if (IS_CANNONLAKE(dev_priv))
- err = cnl_ctx_workarounds_init(dev_priv);
+ cnl_ctx_workarounds_apply(dev_priv);
else if (IS_ICELAKE(dev_priv))
- err = icl_ctx_workarounds_init(dev_priv);
+ icl_ctx_workarounds_apply(dev_priv);
else
MISSING_CASE(INTEL_GEN(dev_priv));
- if (err)
- return err;
DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
dev_priv->workarounds.count);
- return 0;
-}
-
-int intel_ctx_workarounds_emit(struct i915_request *rq)
-{
- struct i915_workarounds *w = &rq->i915->workarounds;
- u32 *cs;
- int ret, i;
-
- if (w->count == 0)
- return 0;
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- cs = intel_ring_begin(rq, (w->count * 2 + 2));
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(w->count);
- for (i = 0; i < w->count; i++) {
- *cs++ = w->reg[i].addr;
- *cs++ = w->reg[i].value;
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- return 0;
}
static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
@@ -7,11 +7,8 @@
#ifndef _I915_WORKAROUNDS_H_
#define _I915_WORKAROUNDS_H_
-int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv);
-int intel_ctx_workarounds_emit(struct i915_request *rq);
-
+void intel_ctx_workarounds_apply(struct drm_i915_private *dev_priv);
void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv);
-
void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);
#endif