@@ -2999,28 +2999,12 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
return indirect_ctx_offset;
}
-static void execlists_init_reg_state(u32 *regs,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
+static void init_common_reg_state(u32 *regs,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
- bool rcs = engine->class == RENDER_CLASS;
u32 base = engine->mmio_base;
- /*
- * A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- *
- * Must keep consistent with virtual_update_register_offsets().
- */
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
-
CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
@@ -3037,38 +3021,44 @@ static void execlists_init_reg_state(u32 *regs,
CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
- if (rcs) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
- RING_INDIRECT_CTX_OFFSET(base), 0);
- if (wa_ctx->indirect_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+}
- regs[CTX_RCS_INDIRECT_CTX + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+static void init_wa_bb_reg_state(u32 *regs,
+ struct intel_engine_cs *engine,
+ u32 pos_bb_per_ctx)
+{
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+ u32 base = engine->mmio_base;
+ u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
+ u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
- regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
- intel_lr_indirect_ctx_offset(engine) << 6;
- }
+ GEM_BUG_ON(engine->id != RCS0);
+ CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
+ CTX_REG(regs, pos_indirect_ctx_offset,
+ RING_INDIRECT_CTX_OFFSET(base), 0);
+ if (wa_ctx->indirect_ctx.size) {
+ u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
- if (wa_ctx->per_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+ regs[pos_indirect_ctx + 1] =
+ (ggtt_offset + wa_ctx->indirect_ctx.offset) |
+ (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
- regs[CTX_BB_PER_CTX_PTR + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
+ regs[pos_indirect_ctx_offset + 1] =
+ intel_lr_indirect_ctx_offset(engine) << 6;
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
+ if (wa_ctx->per_ctx.size) {
+ u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ regs[pos_bb_per_ctx + 1] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+ }
+}
+
+static void init_ppgtt_reg_state(u32 *regs, u32 base,
+ struct i915_ppgtt *ppgtt)
+{
/* PDP values well be assigned later if needed */
CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
@@ -3091,6 +3081,32 @@ static void execlists_init_reg_state(u32 *regs,
ASSIGN_CTX_PDP(ppgtt, regs, 1);
ASSIGN_CTX_PDP(ppgtt, regs, 0);
}
+}
+
+static void gen8_init_reg_state(u32 *regs,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+ bool rcs = engine->class == RENDER_CLASS;
+ u32 base = engine->mmio_base;
+
+ regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
+
+ init_common_reg_state(regs, engine, ring);
+ CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ if (rcs)
+ init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
+
+ regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+
+ CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+ init_ppgtt_reg_state(regs, base, ppgtt);
if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
@@ -3102,6 +3118,58 @@ static void execlists_init_reg_state(u32 *regs,
regs[CTX_END] |= BIT(0);
}
+static void gen12_init_reg_state(u32 *regs,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+ bool rcs = engine->class == RENDER_CLASS;
+ u32 base = engine->mmio_base;
+
+ GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
+
+ regs[GEN12_CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |
+ MI_LRI_FORCE_POSTED;
+
+ init_common_reg_state(regs, engine, ring);
+ if (rcs)
+ init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
+
+ regs[GEN12_CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
+ MI_LRI_FORCE_POSTED;
+
+ CTX_REG(regs, GEN12_CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+
+ init_ppgtt_reg_state(regs, base, ppgtt);
+
+ if (rcs) {
+ regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
+ CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+ 0);
+
+ /* TODO: oa_init_reg_state ? */
+ }
+}
+
+static void execlists_init_reg_state(u32 *regs,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ /* A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ */
+ if (INTEL_GEN(engine->i915) >= 12)
+ gen12_init_reg_state(regs, ce, engine, ring);
+ else
+ gen8_init_reg_state(regs, ce, engine, ring);
+}
+
static int
populate_lr_context(struct intel_context *ce,
struct drm_i915_gem_object *ctx_obj,
@@ -49,6 +49,8 @@ struct intel_engine_cs;
#define EL_CTRL_LOAD (1 << 0)
+#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine) _MMIO((engine)->mmio_base + 0x2b4)
+
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
* wraps to 0."
@@ -9,7 +9,7 @@
#include <linux/types.h>
-/* GEN8+ Reg State Context */
+/* GEN8 to GEN11 Reg State Context */
#define CTX_LRI_HEADER_0 0x01
#define CTX_CONTEXT_CONTROL 0x02
#define CTX_RING_HEAD 0x04
@@ -39,6 +39,34 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_LRI_HEADER_0 CTX_LRI_HEADER_0
+#define GEN12_CTX_CONTEXT_CONTROL CTX_CONTEXT_CONTROL
+#define GEN12_CTX_RING_HEAD CTX_RING_HEAD
+#define GEN12_CTX_RING_TAIL CTX_RING_TAIL
+#define GEN12_CTX_RING_BUFFER_START CTX_RING_BUFFER_START
+#define GEN12_CTX_RING_BUFFER_CONTROL CTX_RING_BUFFER_CONTROL
+#define GEN12_CTX_BB_HEAD_U CTX_BB_HEAD_U
+#define GEN12_CTX_BB_HEAD_L CTX_BB_HEAD_L
+#define GEN12_CTX_BB_STATE CTX_BB_STATE
+#define GEN12_CTX_BB_PER_CTX_PTR 0x12
+#define GEN12_CTX_RCS_INDIRECT_CTX 0x14
+#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET 0x16
+#define GEN12_CTX_LRI_HEADER_1 CTX_LRI_HEADER_1
+#define GEN12_CTX_CTX_TIMESTAMP CTX_CTX_TIMESTAMP
+#define GEN12_CTX_PDP3_UDW CTX_PDP3_UDW
+#define GEN12_CTX_PDP3_LDW CTX_PDP3_LDW
+#define GEN12_CTX_PDP2_UDW CTX_PDP2_UDW
+#define GEN12_CTX_PDP2_LDW CTX_PDP2_LDW
+#define GEN12_CTX_PDP1_UDW CTX_PDP1_UDW
+#define GEN12_CTX_PDP1_LDW CTX_PDP1_LDW
+#define GEN12_CTX_PDP0_UDW CTX_PDP0_UDW
+#define GEN12_CTX_PDP0_LDW CTX_PDP0_LDW
+#define GEN12_CTX_LRI_HEADER_2 0x34
+#define GEN12_CTX_LRI_HEADER_3 0x41
+#define GEN12_CTX_R_PWR_CLK_STATE 0x42
+#define GEN12_CTX_GPGPU_CSR_BASE_ADDRESS 0x44
+
#define CTX_REG(reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \