diff mbox series

[2/2] drm/i915/icl: Add WaDisableBankHangMode

Message ID 20190520082816.32137-2-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915/selftests: Add live_context_workarounds | expand

Commit Message

Tvrtko Ursulin May 20, 2019, 8:28 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Disable GPU hang by default on unrecoverable ECC cache errors.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++
 drivers/gpu/drm/i915/i915_reg.h             | 3 +++
 2 files changed, 9 insertions(+)

Comments

Chris Wilson May 20, 2019, 9:22 a.m. UTC | #1
Quoting Tvrtko Ursulin (2019-05-20 09:28:16)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Disable GPU hang by default on unrecoverable ECC cache errors.

   uint32_t l3cr;
   anv_pack_struct(&l3cr, GENX(L3CNTLREG),
                   .SLMEnable = has_slm,
#if GEN_GEN == 11
   /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
    * in L3CNTLREG register. The default setting of the bit is not the
    * desirable behavior.
   */
                   .ErrorDetectionBehaviorControl = true,
                   .UseFullWays = true,
#endif
                   .URBAllocation = cfg->n[GEN_L3P_URB],
                   .ROAllocation = cfg->n[GEN_L3P_RO],
                   .DCAllocation = cfg->n[GEN_L3P_DC],
                   .AllAllocation = cfg->n[GEN_L3P_ALL]);

   /* Set up the L3 partitioning. */
   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr);

Concurs.

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++
>  drivers/gpu/drm/i915/i915_reg.h             | 3 +++
>  2 files changed, 9 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 4494bc917084..dea7df01e0dc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -532,6 +532,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
>  {
>         struct drm_i915_private *i915 = engine->i915;
>         struct i915_wa_list *wal = &engine->ctx_wa_list;
> +       struct drm_i915_private *dev_priv = i915;
> +
> +       /* WaDisableBankHangMode:icl */
> +       wa_write(wal,
> +                GEN8_L3CNTLREG,
> +                I915_READ(GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL);

Do you have any clue as to what HW is doing that means we can't set this
in gt_workadounds and just rely on it being part of default context
state? Could be a magical power context register which doesn't take
until the GPU is active. Ok, that seems believable, just ugly to have a
context register that not masked.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
Chris Wilson May 20, 2019, 9:26 a.m. UTC | #2
Quoting Chris Wilson (2019-05-20 10:22:36)
> Quoting Tvrtko Ursulin (2019-05-20 09:28:16)
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > Disable GPU hang by default on unrecoverable ECC cache errors.
> 
>    uint32_t l3cr;
>    anv_pack_struct(&l3cr, GENX(L3CNTLREG),
>                    .SLMEnable = has_slm,
> #if GEN_GEN == 11
>    /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
>     * in L3CNTLREG register. The default setting of the bit is not the
>     * desirable behavior.
>    */
>                    .ErrorDetectionBehaviorControl = true,
>                    .UseFullWays = true,
> #endif
>                    .URBAllocation = cfg->n[GEN_L3P_URB],
>                    .ROAllocation = cfg->n[GEN_L3P_RO],
>                    .DCAllocation = cfg->n[GEN_L3P_DC],
>                    .AllAllocation = cfg->n[GEN_L3P_ALL]);
> 
>    /* Set up the L3 partitioning. */
>    emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr);
> 
> Concurs.
> 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++
> >  drivers/gpu/drm/i915/i915_reg.h             | 3 +++
> >  2 files changed, 9 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index 4494bc917084..dea7df01e0dc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -532,6 +532,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
> >  {
> >         struct drm_i915_private *i915 = engine->i915;
> >         struct i915_wa_list *wal = &engine->ctx_wa_list;
> > +       struct drm_i915_private *dev_priv = i915;
> > +
> > +       /* WaDisableBankHangMode:icl */
> > +       wa_write(wal,
> > +                GEN8_L3CNTLREG,
> > +                I915_READ(GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL);
> 
> Do you have any clue as to what HW is doing that means we can't set this
> in gt_workadounds and just rely on it being part of default context
> state? Could be a magical power context register which doesn't take
> until the GPU is active. Ok, that seems believable, just ugly to have a
> context register that not masked.
> 
> Acked-by: Chris Wilson <chris@chris-wilson.co.uk>

And probably should be cc:stable if the recommendation change and it
prevents a hang.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4494bc917084..dea7df01e0dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -532,6 +532,12 @@  static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_wa_list *wal = &engine->ctx_wa_list;
+	struct drm_i915_private *dev_priv = i915;
+
+	/* WaDisableBankHangMode:icl */
+	wa_write(wal,
+		 GEN8_L3CNTLREG,
+		 I915_READ(GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL);
 
 	/* Wa_1604370585:icl (pre-prod)
 	 * Formerly known as WaPushConstantDereferenceHoldDisable
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e97c47fca645..87e8780711d7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7621,6 +7621,9 @@  enum {
   #define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION		(1 << 8)
   #define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE			(1 << 0)
 
+#define GEN8_L3CNTLREG	_MMIO(0x7034)
+  #define GEN8_ERRDETBCTRL (1 << 9)
+
 #define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
   #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	(1 << 11)