@@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
@@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
@@ -1038,11 +1038,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_get_lri_cmd(engine, 2);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, 0));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, 0));
*cs++ = lower_32_bits(pd_daddr);
*cs++ = MI_NOOP;
@@ -1052,13 +1052,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1965,9 +1965,9 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ *cs++ = i915_get_lri_cmd(rq->engine, 4);
for (i = 0; i < 4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
*cs++ = MI_NOOP;
@@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(i915, &ce->sseu));
}
@@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
@@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->id == BCS0)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
@@ -1448,13 +1448,14 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1528,8 +1529,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_get_lri_cmd(engine, 1);
+ *batch++ = i915_get_lri_reg(engine, GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
batch = gen8_emit_pipe_control(batch,
@@ -1599,13 +1600,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1643,7 +1645,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2655,10 +2657,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2666,22 +2668,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2694,7 +2697,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2703,18 +2707,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2730,8 +2735,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
@@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
@@ -1601,12 +1601,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1671,7 +1672,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1717,7 +1722,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1759,9 +1768,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2346,3 +2355,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
return intel_init_ring_buffer(engine);
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
@@ -581,4 +581,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
@@ -614,9 +614,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
@@ -487,6 +487,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
for (i = 0; i < engine->whitelist.count; i++) {
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
u64 addr = scratch->node.start;
struct i915_request *rq;
u32 srm, lrm, rsvd;
@@ -519,8 +520,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = values[v];
/* SRM result */
@@ -532,8 +533,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = ~values[v];
/* SRM result */