diff mbox

drm/i915/execlists: Pull the w/a LRI emission into a helper

Message ID 20180618094150.30895-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 18, 2018, 9:41 a.m. UTC
Having the w/a registers as an open-coded table leaves a trap for the
unwary; it would be easy to miss incrementing the LRI counter when
adding a new register to the list. Instead, pull the list of registers
into a table, so that we only need add new registers to that table
rather than try and remember important side-effects of earlier chunks of
GPU instructions.

Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h  |  3 +-
 drivers/gpu/drm/i915/intel_lrc.c | 60 +++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 18 deletions(-)

Comments

Joonas Lahtinen June 18, 2018, 11:01 a.m. UTC | #1
Quoting Chris Wilson (2018-06-18 12:41:50)
> Having the w/a registers as an open-coded table leaves a trap for the
> unwary; it would be easy to miss incrementing the LRI counter when
> adding a new register to the list. Instead, pull the list of registers
> into a table, so that we only need add new registers to that table
> rather than try and remember important side-effects of earlier chunks of
> GPU instructions.
> 
> Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Not related to this patch, but the lack of OOB check for batch makes
one itch a little bit.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
Chris Wilson June 18, 2018, 11:05 a.m. UTC | #2
Quoting Joonas Lahtinen (2018-06-18 12:01:16)
> Quoting Chris Wilson (2018-06-18 12:41:50)
> > Having the w/a registers as an open-coded table leaves a trap for the
> > unwary; it would be easy to miss incrementing the LRI counter when
> > adding a new register to the list. Instead, pull the list of registers
> > into a table, so that we only need add new registers to that table
> > rather than try and remember important side-effects of earlier chunks of
> > GPU instructions.
> > 
> > Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> Not related to this patch, but the lack of OOB check for batch makes
> one itch a little bit.

We check for the overflow afterwards, if we get that far before the
machine dies. As it's static, that seems reasonable as we'll cover it
entirely in CI.
-Chris
Chris Wilson June 18, 2018, 2:16 p.m. UTC | #3
Quoting Joonas Lahtinen (2018-06-18 12:01:16)
> Quoting Chris Wilson (2018-06-18 12:41:50)
> > Having the w/a registers as an open-coded table leaves a trap for the
> > unwary; it would be easy to miss incrementing the LRI counter when
> > adding a new register to the list. Instead, pull the list of registers
> > into a table, so that we only need add new registers to that table
> > rather than try and remember important side-effects of earlier chunks of
> > GPU instructions.
> > 
> > Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> Not related to this patch, but the lack of OOB check for batch makes
> one itch a little bit.
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Thanks for the suggestion and review, pushed.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 54ec7ab57ce8..1f928bac2532 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -156,6 +156,7 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__)
 #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c))
 
+#define __MASKED_FIELD(mask, value) ((mask) << 16 | (value))
 #define _MASKED_FIELD(mask, value) ({					   \
 	if (__builtin_constant_p(mask))					   \
 		BUILD_BUG_ON_MSG(((mask) & 0xffff0000), "Incorrect mask"); \
@@ -164,7 +165,7 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 	if (__builtin_constant_p(mask) && __builtin_constant_p(value))	   \
 		BUILD_BUG_ON_MSG((value) & ~(mask),			   \
 				 "Incorrect value for mask");		   \
-	(mask) << 16 | (value); })
+	__MASKED_FIELD(mask, value); })
 #define _MASKED_BIT_ENABLE(a)	({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); })
 #define _MASKED_BIT_DISABLE(a)	(_MASKED_FIELD((a), 0))
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index edb21f331eee..58773071787f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1565,29 +1565,55 @@  static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	return batch;
 }
 
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+struct lri {
+	i915_reg_t reg;
+	u32 value;
+};
 
-	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+{
+	GEM_BUG_ON(!count || count > 63);
 
-	*batch++ = MI_LOAD_REGISTER_IMM(3);
+	*batch++ = MI_LOAD_REGISTER_IMM(count);
+	do {
+		*batch++ = i915_mmio_reg_offset(lri->reg);
+		*batch++ = lri->value;
+	} while (lri++, --count);
+	*batch++ = MI_NOOP;
 
-	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-	*batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
-	*batch++ = _MASKED_BIT_DISABLE(
-			GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+	return batch;
+}
 
-	/* BSpec: 11391 */
-	*batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN);
-	*batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX);
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+	static const struct lri lri[] = {
+		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+		{
+			COMMON_SLICE_CHICKEN2,
+			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
+				       0),
+		},
+
+		/* BSpec: 11391 */
+		{
+			FF_SLICE_CHICKEN,
+			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
+				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
+		},
+
+		/* BSpec: 11299 */
+		{
+			_3D_CHICKEN3,
+			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
+				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
+		}
+	};
 
-	/* BSpec: 11299 */
-	*batch++ = i915_mmio_reg_offset(_3D_CHICKEN3);
-	*batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX);
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 
-	*batch++ = MI_NOOP;
+	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */