Message ID | 20181130113201.13007-4-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Restore workarounds after engine reset and unify their handling | expand |
Quoting Tvrtko Ursulin (2018-11-30 11:31:56) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Since we now have all the GT workarounds in a table, by adding a simple > shared helper function we can now verify that their values are still > applied after some interesting events in the lifetime of the driver. > > At this stage these are the driver initialization and engine reset. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 4 +++ > drivers/gpu/drm/i915/i915_gem.c | 3 ++ > drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_workarounds.h | 2 ++ > 4 files changed, 55 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 2f3dc1cf83a6..14d019c9455b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -53,6 +53,7 @@ > #include "i915_vgpu.h" > #include "intel_drv.h" > #include "intel_uc.h" > +#include "intel_workarounds.h" > > static struct drm_driver driver; > > @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) > goto out; > } > > + /* Catch GT workarounds affected by engine reset. */ > + intel_gt_workarounds_verify(engine->i915, engine->name); I'd rather, quite strongly, not have this inside i915_reset_engine() itself. i915_reset_engine() is [designed to be] called from atomic context. Adding I915_READ() here is questionable. My preference is to have all this inside selftests/intel_workarounds where we can cross check the gt/all-engines after device reset and other engine reset. -Chris
Quoting Tvrtko Ursulin (2018-11-30 11:31:56) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Since we now have all the GT workarounds in a table, by adding a simple > shared helper function we can now verify that their values are still > applied after some interesting events in the lifetime of the driver. > > At this stage these are the driver initialization and engine reset. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 4 +++ > drivers/gpu/drm/i915/i915_gem.c | 3 ++ > drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_workarounds.h | 2 ++ > 4 files changed, 55 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 2f3dc1cf83a6..14d019c9455b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -53,6 +53,7 @@ > #include "i915_vgpu.h" > #include "intel_drv.h" > #include "intel_uc.h" > +#include "intel_workarounds.h" > > static struct drm_driver driver; > > @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) > goto out; > } > > + /* Catch GT workarounds affected by engine reset. */ > + intel_gt_workarounds_verify(engine->i915, engine->name); > + > /* > * The request that caused the hang is stuck on elsp, we know the > * active request and can drop it, adjust head to skip the offending > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 18adb3dd1fcd..1eff471d4366 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -5334,7 +5334,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) > I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? > LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); > > + /* Apply the GT workarounds... */ > intel_gt_workarounds_apply(dev_priv); > + /* ...and determine whether they are sticking. */ > + intel_gt_workarounds_verify(dev_priv, "init"); > > i915_gem_init_swizzling(dev_priv); > > diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c > index be63a2af3481..a5c0d206b2a4 100644 > --- a/drivers/gpu/drm/i915/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/intel_workarounds.c > @@ -981,6 +981,52 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) > wa_list_apply(dev_priv, &dev_priv->gt_wa_list); > } > > +static void > +wa_fail(const struct i915_wa *wa, u32 cur, const char *name, const char *from) > +{ > + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", > + name, from, > + i915_mmio_reg_offset(wa->reg), > + cur, cur & wa->mask, wa->val, wa->mask); > +} > + > +static void > +wa_verify_bits(const struct i915_wa *wa, u32 cur, const char *name, > + const char *from) > +{ > + u32 bits = wa->mask; > + u32 cur_ = cur; > + u32 val_ = wa->val; Make sure wa->mask is set to ~0u for whole registers, which it must be for the wa_fail. if ((cur ^ wa->val) & wa->mask) { wa_fail(wa, cur, name, whom); return false; /* make it bool now to save churn later? */ } -Chris
On 30/11/2018 11:38, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-11-30 11:31:56) >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Since we now have all the GT workarounds in a table, by adding a simple >> shared helper function we can now verify that their values are still >> applied after some interesting events in the lifetime of the driver. >> >> At this stage these are the driver initialization and engine reset. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.c | 4 +++ >> drivers/gpu/drm/i915/i915_gem.c | 3 ++ >> drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++ >> drivers/gpu/drm/i915/intel_workarounds.h | 2 ++ >> 4 files changed, 55 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 2f3dc1cf83a6..14d019c9455b 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -53,6 +53,7 @@ >> #include "i915_vgpu.h" >> #include "intel_drv.h" >> #include "intel_uc.h" >> +#include "intel_workarounds.h" >> >> static struct drm_driver driver; >> >> @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) >> goto out; >> } >> >> + /* Catch GT workarounds affected by engine reset. */ >> + intel_gt_workarounds_verify(engine->i915, engine->name); > > I'd rather, quite strongly, not have this inside i915_reset_engine() > itself. i915_reset_engine() is [designed to be] called from atomic > context. Adding I915_READ() here is questionable. My preference is to > have all this inside selftests/intel_workarounds where we can cross > check the gt/all-engines after device reset and other engine reset. I wasn't sure myself how much value checking it here adds, but regarding I915_READs, they are happening during engine->init_hw anyway. So I don't think a few more would harm. But yes, as said, since I was unsure myself I am happy to drop this hunk. Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-11-30 12:02:56) > > On 30/11/2018 11:38, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-11-30 11:31:56) > >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >> Since we now have all the GT workarounds in a table, by adding a simple > >> shared helper function we can now verify that their values are still > >> applied after some interesting events in the lifetime of the driver. > >> > >> At this stage these are the driver initialization and engine reset. > >> > >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_drv.c | 4 +++ > >> drivers/gpu/drm/i915/i915_gem.c | 3 ++ > >> drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++ > >> drivers/gpu/drm/i915/intel_workarounds.h | 2 ++ > >> 4 files changed, 55 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > >> index 2f3dc1cf83a6..14d019c9455b 100644 > >> --- a/drivers/gpu/drm/i915/i915_drv.c > >> +++ b/drivers/gpu/drm/i915/i915_drv.c > >> @@ -53,6 +53,7 @@ > >> #include "i915_vgpu.h" > >> #include "intel_drv.h" > >> #include "intel_uc.h" > >> +#include "intel_workarounds.h" > >> > >> static struct drm_driver driver; > >> > >> @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) > >> goto out; > >> } > >> > >> + /* Catch GT workarounds affected by engine reset. */ > >> + intel_gt_workarounds_verify(engine->i915, engine->name); > > > > I'd rather, quite strongly, not have this inside i915_reset_engine() > > itself. i915_reset_engine() is [designed to be] called from atomic > > context. Adding I915_READ() here is questionable. My preference is to > > have all this inside selftests/intel_workarounds where we can cross > > check the gt/all-engines after device reset and other engine reset. > > I wasn't sure myself how much value checking it here adds, but regarding > I915_READs, they are happening during engine->init_hw anyway. So I don't > think a few more would harm. But yes, as said, since I was unsure myself > I am happy to drop this hunk. Adding the I915_READ isn't the end-of-the-world, I am more wary of the slipperly slope and suddenly finding ourselves with a per-engine reset that is no longer suitable for fast resets, possibly from hardirq context. (Although that's more likely to be softirq really.) In this case, I think we can satisfy ourselves with a comprehensive yet fast test suite. And there's no harm in having the checks on idle for runtime consistency checking (we can even inject SRM before idling for completeness). -Chris
On 30/11/2018 11:54, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-11-30 11:31:56) >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Since we now have all the GT workarounds in a table, by adding a simple >> shared helper function we can now verify that their values are still >> applied after some interesting events in the lifetime of the driver. >> >> At this stage these are the driver initialization and engine reset. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.c | 4 +++ >> drivers/gpu/drm/i915/i915_gem.c | 3 ++ >> drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++ >> drivers/gpu/drm/i915/intel_workarounds.h | 2 ++ >> 4 files changed, 55 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 2f3dc1cf83a6..14d019c9455b 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -53,6 +53,7 @@ >> #include "i915_vgpu.h" >> #include "intel_drv.h" >> #include "intel_uc.h" >> +#include "intel_workarounds.h" >> >> static struct drm_driver driver; >> >> @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) >> goto out; >> } >> >> + /* Catch GT workarounds affected by engine reset. */ >> + intel_gt_workarounds_verify(engine->i915, engine->name); >> + >> /* >> * The request that caused the hang is stuck on elsp, we know the >> * active request and can drop it, adjust head to skip the offending >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 18adb3dd1fcd..1eff471d4366 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -5334,7 +5334,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) >> I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? >> LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); >> >> + /* Apply the GT workarounds... */ >> intel_gt_workarounds_apply(dev_priv); >> + /* ...and determine whether they are sticking. */ >> + intel_gt_workarounds_verify(dev_priv, "init"); >> >> i915_gem_init_swizzling(dev_priv); >> >> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c >> index be63a2af3481..a5c0d206b2a4 100644 >> --- a/drivers/gpu/drm/i915/intel_workarounds.c >> +++ b/drivers/gpu/drm/i915/intel_workarounds.c >> @@ -981,6 +981,52 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) >> wa_list_apply(dev_priv, &dev_priv->gt_wa_list); >> } >> >> +static void >> +wa_fail(const struct i915_wa *wa, u32 cur, const char *name, const char *from) >> +{ >> + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", >> + name, from, >> + i915_mmio_reg_offset(wa->reg), >> + cur, cur & wa->mask, wa->val, wa->mask); >> +} >> + >> +static void >> +wa_verify_bits(const struct i915_wa *wa, u32 cur, const char *name, >> + const char *from) >> +{ >> + u32 bits = wa->mask; >> + u32 cur_ = cur; >> + u32 val_ = wa->val; > > Make sure wa->mask is set to ~0u for whole registers, which it must be > for the wa_fail. It is (wa_write), unless you spotted a miss? > if ((cur ^ wa->val) & wa->mask) { > wa_fail(wa, cur, name, whom); Not sure if I feel like laughing or crying at my implementation. :) I was cleaning up the lot when I realized it all reduces to simplicity, but obviously missed this one.. > return false; /* make it bool now to save churn later? */ > } Okay. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f3dc1cf83a6..14d019c9455b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -53,6 +53,7 @@ #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" +#include "intel_workarounds.h" static struct drm_driver driver; @@ -2362,6 +2363,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) goto out; } + /* Catch GT workarounds affected by engine reset. */ + intel_gt_workarounds_verify(engine->i915, engine->name); + /* * The request that caused the hang is stuck on elsp, we know the * active request and can drop it, adjust head to skip the offending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 18adb3dd1fcd..1eff471d4366 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5334,7 +5334,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + /* Apply the GT workarounds... */ intel_gt_workarounds_apply(dev_priv); + /* ...and determine whether they are sticking. */ + intel_gt_workarounds_verify(dev_priv, "init"); i915_gem_init_swizzling(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index be63a2af3481..a5c0d206b2a4 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -981,6 +981,52 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) wa_list_apply(dev_priv, &dev_priv->gt_wa_list); } +static void +wa_fail(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, + i915_mmio_reg_offset(wa->reg), + cur, cur & wa->mask, wa->val, wa->mask); +} + +static void +wa_verify_bits(const struct i915_wa *wa, u32 cur, const char *name, + const char *from) +{ + u32 bits = wa->mask; + u32 cur_ = cur; + u32 val_ = wa->val; + + while (bits) { + if ((bits & 1) && ((cur_ & 1) != (val_ & 1))) { + wa_fail(wa, cur, name, from); + break; + } + + bits >>= 1; + cur_ >>= 1; + val_ >>= 1; + } +} + +static void wa_list_verify(struct drm_i915_private *dev_priv, + const struct i915_wa_list *wal, + const char *from) +{ + struct i915_wa *wa; + unsigned int i; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + wa_verify_bits(wa, I915_READ(wa->reg), wal->name, from); +} + +void intel_gt_workarounds_verify(struct drm_i915_private *dev_priv, + const char *from) +{ + wa_list_verify(dev_priv, &dev_priv->gt_wa_list, from); +} + struct whitelist { i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; unsigned int count; diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 2998767d51ca..845c18dc110d 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -33,6 +33,8 @@ int intel_ctx_workarounds_emit(struct i915_request *rq); void intel_gt_workarounds_init(struct drm_i915_private *dev_priv); void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); +void intel_gt_workarounds_verify(struct drm_i915_private *dev_priv, + const char *from); void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);