diff mbox series

[2/2] drm/i915/tgl: Register state context definition for Gen12

Message ID 20190906122314.2146-2-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915: Use engine relative LRIs on context setup | expand

Commit Message

Mika Kuoppala Sept. 6, 2019, 12:23 p.m. UTC
From: Michel Thierry <michel.thierry@intel.com>

Gen12 has subtle changes in the reg state context offsets (some fields
are gone, some are in a different location), compared to previous Gens.

The simplest approach seems to be keeping Gen12 (and future platform)
changes apart from the previous gens, while keeping the registers that
are contiguous in functions we can reuse.

v2: alias, virtual engine, rpcs, prune unused regs
v3: use engine base (Daniele), take ctx_bb for all

Bspec: 46255
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 196 +++++++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h |   6 +-
 2 files changed, 147 insertions(+), 55 deletions(-)

Comments

Daniele Ceraolo Spurio Sept. 6, 2019, 3:42 p.m. UTC | #1
On 9/6/19 5:23 AM, Mika Kuoppala wrote:
> From: Michel Thierry <michel.thierry@intel.com>
> 
> Gen12 has subtle changes in the reg state context offsets (some fields
> are gone, some are in a different location), compared to previous Gens.
> 
> The simplest approach seems to be keeping Gen12 (and future platform)
> changes apart from the previous gens, while keeping the registers that
> are contiguous in functions we can reuse.
> 
> v2: alias, virtual engine, rpcs, prune unused regs
> v3: use engine base (Daniele), take ctx_bb for all
> 
> Bspec: 46255
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: José Roberto de Souza <jose.souza@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

I've tested this on TGL, both the gem_ctx_switch that failed on ICL and 
exec_balancer@nop passed for me.

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Tested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

A couple of non-blocking nits below.

> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c     | 196 +++++++++++++++++-------
>   drivers/gpu/drm/i915/gt/intel_lrc_reg.h |   6 +-
>   2 files changed, 147 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 6c68ed2bf3d2..e9c873877253 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -808,8 +808,11 @@ static void virtual_update_register_offsets(u32 *regs,
>   {
>   	u32 base = engine->mmio_base;
>   
> +	GEM_WARN_ON(engine->class == COPY_ENGINE_CLASS);

Could use a comment up here to explain why, something like:

/* HW doesn't not support relative MMIO on COPY_ENGINE and we don't
implement offset remap for all gens in SW because there is only 1
instance */

> + >   	/* Must match execlists_init_reg_state()! */
>   
> +	/* Common part */
>   	regs[CTX_CONTEXT_CONTROL] =
>   		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
>   	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> @@ -820,13 +823,16 @@ static void virtual_update_register_offsets(u32 *regs,
>   	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
>   	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
>   	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +
>   	regs[CTX_SECOND_BB_HEAD_U] =
>   		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
>   	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
>   	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
>   
> +	/* PPGTT part */
>   	regs[CTX_CTX_TIMESTAMP] =
>   		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +
>   	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
>   	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
>   	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> @@ -3123,37 +3129,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>   	return indirect_ctx_offset;
>   }
>   
> -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> -{
> -	if (i915_is_ggtt(vm))
> -		return i915_vm_to_ggtt(vm)->alias;
> -	else
> -		return i915_vm_to_ppgtt(vm);
> -}
>   
> -static void execlists_init_reg_state(u32 *regs,
> -				     struct intel_context *ce,
> -				     struct intel_engine_cs *engine,
> -				     struct intel_ring *ring)
> +static void init_common_reg_state(u32 * const regs,
> +				  struct i915_ppgtt * const ppgtt,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_ring *ring)
>   {
> -	struct i915_ppgtt *ppgtt = vm_alias(ce->vm);
> -	const bool rcs = engine->class == RENDER_CLASS;
>   	const u32 base = engine->mmio_base;
> -	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
> -		MI_LRI_CS_MMIO : 0;
> -
> -	/*
> -	 * A context is actually a big batch buffer with several
> -	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> -	 * values we are setting here are only for the first context restore:
> -	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> -	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> -	 * we are not initializing here).
> -	 *
> -	 * Must keep consistent with virtual_update_register_offsets().
> -	 */
> -	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> -		MI_LRI_FORCE_POSTED | lri_base;
>   
>   	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>   		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> @@ -3171,39 +3153,43 @@ static void execlists_init_reg_state(u32 *regs,
>   	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>   	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>   	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> -	if (rcs) {
> -		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> -			RING_INDIRECT_CTX_OFFSET(base), 0);
> -		if (wa_ctx->indirect_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +}
>   
> -			regs[CTX_RCS_INDIRECT_CTX + 1] =
> -				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> -				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
> +static void init_wa_bb_reg_state(u32 * const regs,
> +				 struct intel_engine_cs *engine,
> +				 u32 pos_bb_per_ctx)
> +{
> +	struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
> +	const u32 base = engine->mmio_base;
> +	const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
> +	const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
>   
> -			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
> -				intel_lr_indirect_ctx_offset(engine) << 6;
> -		}
> +	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
> +	CTX_REG(regs, pos_indirect_ctx_offset,
> +		RING_INDIRECT_CTX_OFFSET(base), 0);
> +	if (wa_ctx->indirect_ctx.size) {
> +		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> -		if (wa_ctx->per_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +		regs[pos_indirect_ctx + 1] =
> +			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> +			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
>   
> -			regs[CTX_BB_PER_CTX_PTR + 1] =
> -				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> -		}
> +		regs[pos_indirect_ctx_offset + 1] =
> +			intel_lr_indirect_ctx_offset(engine) << 6;
>   	}
>   
> -	regs[CTX_LRI_HEADER_1] =
> -		MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED | lri_base;
> +	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
> +	if (wa_ctx->per_ctx.size) {
> +		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +		regs[pos_bb_per_ctx + 1] =
> +			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> +	}
> +}
> +
> +static void init_ppgtt_reg_state(u32 *regs, u32 base,
> +				 struct i915_ppgtt *ppgtt)
> +{
>   	/* PDP values well be assigned later if needed */
>   	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>   	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> @@ -3226,6 +3212,53 @@ static void execlists_init_reg_state(u32 *regs,
>   		ASSIGN_CTX_PDP(ppgtt, regs, 1);
>   		ASSIGN_CTX_PDP(ppgtt, regs, 0);
>   	}
> +}
> +
> +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> +{
> +	if (i915_is_ggtt(vm))
> +		return i915_vm_to_ggtt(vm)->alias;
> +	else
> +		return i915_vm_to_ppgtt(vm);
> +}
> +
> +static void gen8_init_reg_state(u32 * const regs,
> +				struct intel_context *ce,
> +				struct intel_engine_cs *engine,
> +				struct intel_ring *ring)
> +{
> +	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
> +	const bool rcs = engine->class == RENDER_CLASS;
> +	const u32 base = engine->mmio_base;
> +	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
> +		MI_LRI_CS_MMIO : 0;
> +
> +	/*
> +	 * A context is actually a big batch buffer with several
> +	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> +	 * values we are setting here are only for the first context restore:
> +	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> +	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> +	 * we are not initializing here).
> +	 *
> +	 * Must keep consistent with virtual_update_register_offsets().
> +	 */
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> +		MI_LRI_FORCE_POSTED | lri_base;
> +
> +	init_common_reg_state(regs, ppgtt, engine, ring);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] =
> +		MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED | lri_base;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
>   
>   	if (rcs) {
>   		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base;
> @@ -3237,6 +3270,61 @@ static void execlists_init_reg_state(u32 *regs,
>   		regs[CTX_END] |= BIT(0);
>   }
>   
> +static void gen12_init_reg_state(u32 * const regs,
> +				 struct intel_context *ce,
> +				 struct intel_engine_cs *engine,
> +				 struct intel_ring *ring)
> +{
> +	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm);
> +	const bool rcs = engine->class == RENDER_CLASS;
> +	const u32 base = engine->mmio_base;
> +	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
> +		MI_LRI_CS_MMIO : 0;
> +
> +	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> +
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) |

I would've kept a constant 13 here since we'll need to set the 13th 
register (that's the semaphore reg you had in the previous revision), 
but anyway we can bump it when that support is added in.

Daniele

> +		MI_LRI_FORCE_POSTED | lri_base;
> +
> +	init_common_reg_state(regs, ppgtt, engine, ring);
> +
> +	/* We want ctx_ptr for all engines to be set */
> +	init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> +		MI_LRI_FORCE_POSTED | lri_base;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
> +
> +	if (rcs) {
> +		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1) |
> +			lri_base;
> +		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> +
> +		/* TODO: oa_init_reg_state ? */
> +	}
> +}
> +
> +static void execlists_init_reg_state(u32 *regs,
> +				     struct intel_context *ce,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring)
> +{
> +	/* A context is actually a big batch buffer with several
> +	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> +	 * values we are setting here are only for the first context restore:
> +	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> +	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> +	 * we are not initializing here).
> +	 */
> +	if (INTEL_GEN(engine->i915) >= 12)
> +		gen12_init_reg_state(regs, ce, engine, ring);
> +	else
> +		gen8_init_reg_state(regs, ce, engine, ring);
> +}
> +
>   static int
>   populate_lr_context(struct intel_context *ce,
>   		    struct drm_i915_gem_object *ctx_obj,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index b8f20ad71169..68caf8541866 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -9,7 +9,7 @@
>   
>   #include <linux/types.h>
>   
> -/* GEN8+ Reg State Context */
> +/* GEN8 to GEN11 Reg State Context */
>   #define CTX_LRI_HEADER_0		0x01
>   #define CTX_CONTEXT_CONTROL		0x02
>   #define CTX_RING_HEAD			0x04
> @@ -39,6 +39,10 @@
>   #define CTX_R_PWR_CLK_STATE		0x42
>   #define CTX_END				0x44
>   
> +/* GEN12+ Reg State Context */
> +#define GEN12_CTX_BB_PER_CTX_PTR		0x12
> +#define GEN12_CTX_LRI_HEADER_3			0x41
> +
>   #define CTX_REG(reg_state, pos, reg, val) do { \
>   	u32 *reg_state__ = (reg_state); \
>   	const u32 pos__ = (pos); \
>
Chris Wilson Sept. 6, 2019, 4:29 p.m. UTC | #2
Quoting Daniele Ceraolo Spurio (2019-09-06 16:42:50)
> 
> On 9/6/19 5:23 AM, Mika Kuoppala wrote:
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -808,8 +808,11 @@ static void virtual_update_register_offsets(u32 *regs,
> >   {
> >       u32 base = engine->mmio_base;
> >   
> > +     GEM_WARN_ON(engine->class == COPY_ENGINE_CLASS);
> 
> Could use a comment up here to explain why, something like:
> 
> /* HW doesn't not support relative MMIO on COPY_ENGINE and we don't
> implement offset remap for all gens in SW because there is only 1
> instance */

What's the point of the check anyway? If the LRI are not using
relative addressing, we need to fixup the offsets. Aiui, it should just
be GEM_BUG_ON(intel_engine_has_relative_mmio(engine)). That we have only
a single instance in a particular class just means we never even call
the update function currently.
-Chris
Chris Wilson Sept. 6, 2019, 4:55 p.m. UTC | #3
Quoting Daniele Ceraolo Spurio (2019-09-06 16:42:50)
> 
> > +static void gen12_init_reg_state(u32 * const regs,
> > +                              struct intel_context *ce,
> > +                              struct intel_engine_cs *engine,
> > +                              struct intel_ring *ring)
> > +{
> > +     struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm);
> > +     const bool rcs = engine->class == RENDER_CLASS;
> > +     const u32 base = engine->mmio_base;
> > +     const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
> > +             MI_LRI_CS_MMIO : 0;
> > +
> > +     GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> > +
> > +     regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) |
> 
> I would've kept a constant 13 here since we'll need to set the 13th 
> register (that's the semaphore reg you had in the previous revision), 
> but anyway we can bump it when that support is added in.

I left this for a future task. Early next week I hope to have a new
selftest ready that enforces that our init_reg_state() matches the HW
layout. For now, this gets us onto the next error we need to debug.

Thanks for the patches and reviewing,
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 6c68ed2bf3d2..e9c873877253 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -808,8 +808,11 @@  static void virtual_update_register_offsets(u32 *regs,
 {
 	u32 base = engine->mmio_base;
 
+	GEM_WARN_ON(engine->class == COPY_ENGINE_CLASS);
+
 	/* Must match execlists_init_reg_state()! */
 
+	/* Common part */
 	regs[CTX_CONTEXT_CONTROL] =
 		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
 	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
@@ -820,13 +823,16 @@  static void virtual_update_register_offsets(u32 *regs,
 	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
 	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
 	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+
 	regs[CTX_SECOND_BB_HEAD_U] =
 		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
 	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
 	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
 
+	/* PPGTT part */
 	regs[CTX_CTX_TIMESTAMP] =
 		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+
 	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
 	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
 	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
@@ -3123,37 +3129,13 @@  static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	return indirect_ctx_offset;
 }
 
-static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
-{
-	if (i915_is_ggtt(vm))
-		return i915_vm_to_ggtt(vm)->alias;
-	else
-		return i915_vm_to_ppgtt(vm);
-}
 
-static void execlists_init_reg_state(u32 *regs,
-				     struct intel_context *ce,
-				     struct intel_engine_cs *engine,
-				     struct intel_ring *ring)
+static void init_common_reg_state(u32 * const regs,
+				  struct i915_ppgtt * const ppgtt,
+				  struct intel_engine_cs *engine,
+				  struct intel_ring *ring)
 {
-	struct i915_ppgtt *ppgtt = vm_alias(ce->vm);
-	const bool rcs = engine->class == RENDER_CLASS;
 	const u32 base = engine->mmio_base;
-	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
-		MI_LRI_CS_MMIO : 0;
-
-	/*
-	 * A context is actually a big batch buffer with several
-	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
-	 * values we are setting here are only for the first context restore:
-	 * on a subsequent save, the GPU will recreate this batchbuffer with new
-	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
-	 * we are not initializing here).
-	 *
-	 * Must keep consistent with virtual_update_register_offsets().
-	 */
-	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
-		MI_LRI_FORCE_POSTED | lri_base;
 
 	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
 		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
@@ -3171,39 +3153,43 @@  static void execlists_init_reg_state(u32 *regs,
 	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
 	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
 	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
-	if (rcs) {
-		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
-		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
-			RING_INDIRECT_CTX_OFFSET(base), 0);
-		if (wa_ctx->indirect_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+}
 
-			regs[CTX_RCS_INDIRECT_CTX + 1] =
-				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
-				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+static void init_wa_bb_reg_state(u32 * const regs,
+				 struct intel_engine_cs *engine,
+				 u32 pos_bb_per_ctx)
+{
+	struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+	const u32 base = engine->mmio_base;
+	const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
+	const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
 
-			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
-				intel_lr_indirect_ctx_offset(engine) << 6;
-		}
+	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
+	CTX_REG(regs, pos_indirect_ctx_offset,
+		RING_INDIRECT_CTX_OFFSET(base), 0);
+	if (wa_ctx->indirect_ctx.size) {
+		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
-		if (wa_ctx->per_ctx.size) {
-			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+		regs[pos_indirect_ctx + 1] =
+			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
+			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
 
-			regs[CTX_BB_PER_CTX_PTR + 1] =
-				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
-		}
+		regs[pos_indirect_ctx_offset + 1] =
+			intel_lr_indirect_ctx_offset(engine) << 6;
 	}
 
-	regs[CTX_LRI_HEADER_1] =
-		MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED | lri_base;
+	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
+	if (wa_ctx->per_ctx.size) {
+		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+		regs[pos_bb_per_ctx + 1] =
+			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+	}
+}
+
+static void init_ppgtt_reg_state(u32 *regs, u32 base,
+				 struct i915_ppgtt *ppgtt)
+{
 	/* PDP values well be assigned later if needed */
 	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
 	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
@@ -3226,6 +3212,53 @@  static void execlists_init_reg_state(u32 *regs,
 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
 	}
+}
+
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+	if (i915_is_ggtt(vm))
+		return i915_vm_to_ggtt(vm)->alias;
+	else
+		return i915_vm_to_ppgtt(vm);
+}
+
+static void gen8_init_reg_state(u32 * const regs,
+				struct intel_context *ce,
+				struct intel_engine_cs *engine,
+				struct intel_ring *ring)
+{
+	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
+	const bool rcs = engine->class == RENDER_CLASS;
+	const u32 base = engine->mmio_base;
+	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
+		MI_LRI_CS_MMIO : 0;
+
+	/*
+	 * A context is actually a big batch buffer with several
+	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+	 * values we are setting here are only for the first context restore:
+	 * on a subsequent save, the GPU will recreate this batchbuffer with new
+	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+	 * we are not initializing here).
+	 *
+	 * Must keep consistent with virtual_update_register_offsets().
+	 */
+	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
+		MI_LRI_FORCE_POSTED | lri_base;
+
+	init_common_reg_state(regs, ppgtt, engine, ring);
+	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+	if (rcs)
+		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
+
+	regs[CTX_LRI_HEADER_1] =
+		MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED | lri_base;
+
+	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+	init_ppgtt_reg_state(regs, base, ppgtt);
 
 	if (rcs) {
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base;
@@ -3237,6 +3270,61 @@  static void execlists_init_reg_state(u32 *regs,
 		regs[CTX_END] |= BIT(0);
 }
 
+static void gen12_init_reg_state(u32 * const regs,
+				 struct intel_context *ce,
+				 struct intel_engine_cs *engine,
+				 struct intel_ring *ring)
+{
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm);
+	const bool rcs = engine->class == RENDER_CLASS;
+	const u32 base = engine->mmio_base;
+	const u32 lri_base = intel_engine_has_relative_mmio(engine) ?
+		MI_LRI_CS_MMIO : 0;
+
+	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
+
+	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) |
+		MI_LRI_FORCE_POSTED | lri_base;
+
+	init_common_reg_state(regs, ppgtt, engine, ring);
+
+	/* We want ctx_ptr for all engines to be set */
+	init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
+
+	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
+		MI_LRI_FORCE_POSTED | lri_base;
+
+	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+	init_ppgtt_reg_state(regs, base, ppgtt);
+
+	if (rcs) {
+		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1) |
+			lri_base;
+		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+
+		/* TODO: oa_init_reg_state ? */
+	}
+}
+
+static void execlists_init_reg_state(u32 *regs,
+				     struct intel_context *ce,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring)
+{
+	/* A context is actually a big batch buffer with several
+	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+	 * values we are setting here are only for the first context restore:
+	 * on a subsequent save, the GPU will recreate this batchbuffer with new
+	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+	 * we are not initializing here).
+	 */
+	if (INTEL_GEN(engine->i915) >= 12)
+		gen12_init_reg_state(regs, ce, engine, ring);
+	else
+		gen8_init_reg_state(regs, ce, engine, ring);
+}
+
 static int
 populate_lr_context(struct intel_context *ce,
 		    struct drm_i915_gem_object *ctx_obj,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index b8f20ad71169..68caf8541866 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,7 +9,7 @@ 
 
 #include <linux/types.h>
 
-/* GEN8+ Reg State Context */
+/* GEN8 to GEN11 Reg State Context */
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -39,6 +39,10 @@ 
 #define CTX_R_PWR_CLK_STATE		0x42
 #define CTX_END				0x44
 
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_BB_PER_CTX_PTR		0x12
+#define GEN12_CTX_LRI_HEADER_3			0x41
+
 #define CTX_REG(reg_state, pos, reg, val) do { \
 	u32 *reg_state__ = (reg_state); \
 	const u32 pos__ = (pos); \