@@ -104,6 +104,7 @@ gt-y += \
gt/intel_gt_requests.o \
gt/intel_gtt.o \
gt/intel_llc.o \
+ gt/intel_lrc.o \
gt/intel_mocs.o \
gt/intel_ppgtt.o \
gt/intel_rc6.o \
@@ -5,7 +5,7 @@
#include "gen8_engine_cs.h"
#include "i915_drv.h"
-#include "intel_execlists_submission.h" /* XXX */
+#include "intel_lrc.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h"
@@ -8,8 +8,7 @@
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gpu_commands.h"
-#include "intel_execlists_submission.h"
-#include "intel_lrc_reg.h"
+#include "intel_lrc.h"
#include "intel_ring.h"
#include "intel_sseu.h"
@@ -109,7 +109,6 @@
#include <linux/interrupt.h>
#include "i915_drv.h"
-#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
#include "gen8_engine_cs.h"
@@ -120,6 +119,7 @@
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
+#include "intel_lrc.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -144,8 +144,6 @@
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
-#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
-
#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
@@ -208,133 +206,6 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
-static void execlists_init_reg_state(u32 *reg_state,
- const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- const struct intel_ring *ring,
- bool close);
-static void
-__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- u32 head);
-
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
-static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x74;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x68;
- else if (engine->class == RENDER_CLASS)
- return 0xd8;
- else
- return -1;
-}
-
-static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x12;
- else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
- return 0x18;
- else
- return -1;
-}
-
-static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
-{
- int x;
-
- x = lrc_ring_wa_bb_per_ctx(engine);
- if (x < 0)
- return x;
-
- return x + 2;
-}
-
-static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
-{
- int x;
-
- x = lrc_ring_indirect_ptr(engine);
- if (x < 0)
- return x;
-
- return x + 2;
-}
-
-static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
-{
- if (engine->class != RENDER_CLASS)
- return -1;
-
- if (INTEL_GEN(engine->i915) >= 12)
- return 0xb6;
- else if (INTEL_GEN(engine->i915) >= 11)
- return 0xaa;
- else
- return -1;
-}
-
-static u32
-lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
-{
- switch (INTEL_GEN(engine->i915)) {
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- fallthrough;
- case 12:
- return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 11:
- return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 10:
- return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 9:
- return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 8:
- return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- }
-}
-
-static void
-lrc_ring_setup_indirect_ctx(u32 *regs,
- const struct intel_engine_cs *engine,
- u32 ctx_bb_ggtt_addr,
- u32 size)
-{
- GEM_BUG_ON(!size);
- GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
- GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
- regs[lrc_ring_indirect_ptr(engine) + 1] =
- ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
-
- GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
- regs[lrc_ring_indirect_offset(engine) + 1] =
- lrc_ring_indirect_offset_default(engine) << 6;
-}
-
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
- /*
- * We can use either ppHWSP[16] which is recorded before the context
- * switch (and so excludes the cost of context switches) or use the
- * value from the context image itself, which is saved/restored earlier
- * and so includes the cost of the save.
- */
- return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
@@ -513,568 +384,6 @@ assert_priority_queue(const struct i915_request *prev,
return rq_prio(prev) >= rq_prio(next);
}
-/*
- * The context descriptor encodes various attributes of a context,
- * including its GTT address and some flags. Because it's fairly
- * expensive to calculate, we'll just do it once and cache the result,
- * which remains valid until the context is unpinned.
- *
- * This is what a descriptor looks like, from LSB to MSB::
- *
- * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
- * bits 12-31: LRCA, GTT address of (the HWSP of) this context
- * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
- * bits 53-54: mbz, reserved for use by hardware
- * bits 55-63: group ID, currently unused and set to 0
- *
- * Starting from Gen11, the upper dword of the descriptor has a new format:
- *
- * bits 32-36: reserved
- * bits 37-47: SW context ID
- * bits 48:53: engine instance
- * bit 54: mbz, reserved for use by hardware
- * bits 55-60: SW counter
- * bits 61-63: engine class
- *
- * engine info, SW context ID and SW counter need to form a unique number
- * (Context ID) per lrc.
- */
-static u32
-lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
-{
- u32 desc;
-
- desc = INTEL_LEGACY_32B_CONTEXT;
- if (i915_vm_is_4lvl(ce->vm))
- desc = INTEL_LEGACY_64B_CONTEXT;
- desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
- desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
- if (IS_GEN(engine->i915, 8))
- desc |= GEN8_CTX_L3LLC_COHERENT;
-
- return i915_ggtt_offset(ce->state) | desc;
-}
-
-static inline unsigned int dword_in_page(void *addr)
-{
- return offset_in_page(addr) / sizeof(u32);
-}
-
-static void set_offsets(u32 *regs,
- const u8 *data,
- const struct intel_engine_cs *engine,
- bool clear)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
- (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
- (((x) >> 2) & 0x7f)
-#define END(total_state_size) 0, (total_state_size)
-{
- const u32 base = engine->mmio_base;
-
- while (*data) {
- u8 count, flags;
-
- if (*data & BIT(7)) { /* skip */
- count = *data++ & ~BIT(7);
- if (clear)
- memset32(regs, MI_NOOP, count);
- regs += count;
- continue;
- }
-
- count = *data & 0x3f;
- flags = *data >> 6;
- data++;
-
- *regs = MI_LOAD_REGISTER_IMM(count);
- if (flags & POSTED)
- *regs |= MI_LRI_FORCE_POSTED;
- if (INTEL_GEN(engine->i915) >= 11)
- *regs |= MI_LRI_LRM_CS_MMIO;
- regs++;
-
- GEM_BUG_ON(!count);
- do {
- u32 offset = 0;
- u8 v;
-
- do {
- v = *data++;
- offset <<= 7;
- offset |= v & ~BIT(7);
- } while (v & BIT(7));
-
- regs[0] = base + (offset << 2);
- if (clear)
- regs[1] = 0;
- regs += 2;
- } while (--count);
- }
-
- if (clear) {
- u8 count = *++data;
-
- /* Clear past the tail for HW access */
- GEM_BUG_ON(dword_in_page(regs) > count);
- memset32(regs, MI_NOOP, count - dword_in_page(regs));
-
- /* Close the batch; used mainly by live_lrc_layout() */
- *regs = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- *regs |= BIT(0);
- }
-}
-
-static const u8 gen8_xcs_offsets[] = {
- NOP(1),
- LRI(11, 0),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x11c),
- REG(0x114),
- REG(0x118),
-
- NOP(9),
- LRI(9, 0),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(13),
- LRI(2, 0),
- REG16(0x200),
- REG(0x028),
-
- END(80)
-};
-
-static const u8 gen9_xcs_offsets[] = {
- NOP(1),
- LRI(14, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x11c),
- REG(0x114),
- REG(0x118),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
-
- NOP(3),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(13),
- LRI(1, POSTED),
- REG16(0x200),
-
- NOP(13),
- LRI(44, POSTED),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
-
- END(176)
-};
-
-static const u8 gen12_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- END(80)
-};
-
-static const u8 gen8_rcs_offsets[] = {
- NOP(1),
- LRI(14, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x11c),
- REG(0x114),
- REG(0x118),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
-
- NOP(3),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(13),
- LRI(1, 0),
- REG(0x0c8),
-
- END(80)
-};
-
-static const u8 gen9_rcs_offsets[] = {
- NOP(1),
- LRI(14, POSTED),
- REG16(0x244),
- REG(0x34),
- REG(0x30),
- REG(0x38),
- REG(0x3c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x11c),
- REG(0x114),
- REG(0x118),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
-
- NOP(3),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(13),
- LRI(1, 0),
- REG(0xc8),
-
- NOP(13),
- LRI(44, POSTED),
- REG(0x28),
- REG(0x9c),
- REG(0xc0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x68),
-
- END(176)
-};
-
-static const u8 gen11_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x11c),
- REG(0x114),
- REG(0x118),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(1, POSTED),
- REG(0x1b0),
-
- NOP(10),
- LRI(1, 0),
- REG(0x0c8),
-
- END(80)
-};
-
-static const u8 gen12_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
- NOP(3 + 9 + 1),
-
- LRI(51, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
- REG(0x084),
- NOP(1),
-
- END(192)
-};
-
-#undef END
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(const struct intel_engine_cs *engine)
-{
- /*
- * The gen12+ lists only have the registers we program in the basic
- * default state. We rely on the context image using relative
- * addressing to automatic fixup the register state between the
- * physical engines for virtual engine.
- */
- GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
- !intel_engine_has_relative_mmio(engine));
-
- if (engine->class == RENDER_CLASS) {
- if (INTEL_GEN(engine->i915) >= 12)
- return gen12_rcs_offsets;
- else if (INTEL_GEN(engine->i915) >= 11)
- return gen11_rcs_offsets;
- else if (INTEL_GEN(engine->i915) >= 9)
- return gen9_rcs_offsets;
- else
- return gen8_rcs_offsets;
- } else {
- if (INTEL_GEN(engine->i915) >= 12)
- return gen12_xcs_offsets;
- else if (INTEL_GEN(engine->i915) >= 9)
- return gen9_xcs_offsets;
- else
- return gen8_xcs_offsets;
- }
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -1187,58 +496,6 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
-static void
-execlists_check_context(const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- const char *when)
-{
- const struct intel_ring *ring = ce->ring;
- u32 *regs = ce->lrc_reg_state;
- bool valid = true;
- int x;
-
- if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
- pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
- engine->name,
- regs[CTX_RING_START],
- i915_ggtt_offset(ring->vma));
- regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
- valid = false;
- }
-
- if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
- (RING_CTL_SIZE(ring->size) | RING_VALID)) {
- pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
- engine->name,
- regs[CTX_RING_CTL],
- (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
- regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
- valid = false;
- }
-
- x = lrc_ring_mi_mode(engine);
- if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
- pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
- engine->name, regs[x + 1]);
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
- valid = false;
- }
-
- WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
-}
-
-static void restore_default_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- u32 *regs;
-
- regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
- execlists_init_reg_state(regs, ce, engine, ce->ring, true);
-
- ce->runtime.last = intel_context_get_runtime(ce);
-}
-
static void reset_active(struct i915_request *rq,
struct intel_engine_cs *engine)
{
@@ -1271,11 +528,10 @@ static void reset_active(struct i915_request *rq,
head = intel_ring_wrap(ce->ring, head);
/* Scrub the context image to prevent replaying the previous batch */
- restore_default_state(ce, engine);
- __execlists_update_reg_state(ce, engine, head);
+ lrc_restore_defaults(ce, engine);
/* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.lrca = lrc_update_regs(ce, engine, head);
}
static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
@@ -1295,7 +551,7 @@ static void intel_context_update_runtime(struct intel_context *ce)
return;
old = ce->runtime.last;
- ce->runtime.last = intel_context_get_runtime(ce);
+ ce->runtime.last = lrc_get_runtime(ce);
dt = ce->runtime.last - old;
if (unlikely(dt < 0)) {
@@ -1321,7 +577,7 @@ __execlists_schedule_in(struct i915_request *rq)
reset_active(rq, engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- execlists_check_context(ce, engine, "before");
+ lrc_check_regs(ce, engine, "before");
if (ce->tag) {
/* Use a fixed tag for OA and friends */
@@ -1393,7 +649,7 @@ __execlists_schedule_out(struct i915_request *rq,
*/
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- execlists_check_context(ce, engine, "after");
+ lrc_check_regs(ce, engine, "after");
/*
* If we have just completed this context, the engine may now be
@@ -1752,12 +1008,6 @@ static bool can_merge_rq(const struct i915_request *prev,
return true;
}
-static void virtual_update_register_offsets(u32 *regs,
- struct intel_engine_cs *engine)
-{
- set_offsets(regs, reg_offsets(engine), engine, false);
-}
-
static bool virtual_matches(const struct virtual_engine *ve,
const struct i915_request *rq,
const struct intel_engine_cs *engine)
@@ -1793,8 +1043,7 @@ static void virtual_xfer_context(struct virtual_engine *ve,
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
if (!intel_engine_has_relative_mmio(engine))
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- engine);
+ lrc_update_offsets(&ve->context, engine);
/*
* Move the bound engine to the top of the list for
@@ -3307,35 +2556,9 @@ static void execlists_context_destroy(struct kref *kref)
intel_context_free(ce);
}
-static void
-set_redzone(void *vaddr, const struct intel_engine_cs *engine)
-{
- if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- return;
-
- vaddr += engine->context_size;
-
- memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
-}
-
-static void
-check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
-{
- if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- return;
-
- vaddr += engine->context_size;
-
- if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
- drm_err_once(&engine->i915->drm,
- "%s context redzone overwritten!\n",
- engine->name);
-}
-
static void execlists_context_unpin(struct intel_context *ce)
{
- check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
- ce->engine);
+ lrc_check_redzone(ce);
}
static void execlists_context_post_unpin(struct intel_context *ce)
@@ -3343,163 +2566,6 @@ static void execlists_context_post_unpin(struct intel_context *ce)
i915_gem_object_unpin_map(ce->state->obj);
}
-static u32 *
-gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
- MI_SRM_LRM_GLOBAL_GTT |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
- CTX_TIMESTAMP * sizeof(u32);
- *cs++ = 0;
-
- *cs++ = MI_LOAD_REGISTER_REG |
- MI_LRR_SOURCE_CS_MMIO |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
-
- *cs++ = MI_LOAD_REGISTER_REG |
- MI_LRR_SOURCE_CS_MMIO |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
-
- return cs;
-}
-
-static u32 *
-gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
-{
- GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
-
- *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
- MI_SRM_LRM_GLOBAL_GTT |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
- (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
- *cs++ = 0;
-
- return cs;
-}
-
-static u32 *
-gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
-{
- GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
-
- *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
- MI_SRM_LRM_GLOBAL_GTT |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
- (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
- *cs++ = 0;
-
- *cs++ = MI_LOAD_REGISTER_REG |
- MI_LRR_SOURCE_CS_MMIO |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
- *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
-
- return cs;
-}
-
-static u32 *
-gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
-{
- cs = gen12_emit_timestamp_wa(ce, cs);
- cs = gen12_emit_cmd_buf_wa(ce, cs);
- cs = gen12_emit_restore_scratch(ce, cs);
-
- return cs;
-}
-
-static u32 *
-gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
-{
- cs = gen12_emit_timestamp_wa(ce, cs);
- cs = gen12_emit_restore_scratch(ce, cs);
-
- return cs;
-}
-
-static inline u32 context_wa_bb_offset(const struct intel_context *ce)
-{
- return PAGE_SIZE * ce->wa_bb_page;
-}
-
-static u32 *context_indirect_bb(const struct intel_context *ce)
-{
- void *ptr;
-
- GEM_BUG_ON(!ce->wa_bb_page);
-
- ptr = ce->lrc_reg_state;
- ptr -= LRC_STATE_OFFSET; /* back to start of context image */
- ptr += context_wa_bb_offset(ce);
-
- return ptr;
-}
-
-static void
-setup_indirect_ctx_bb(const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- u32 *(*emit)(const struct intel_context *, u32 *))
-{
- u32 * const start = context_indirect_bb(ce);
- u32 *cs;
-
- cs = emit(ce, start);
- GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
- while ((unsigned long)cs % CACHELINE_BYTES)
- *cs++ = MI_NOOP;
-
- lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
- i915_ggtt_offset(ce->state) +
- context_wa_bb_offset(ce),
- (cs - start) * sizeof(*cs));
-}
-
-static void
-__execlists_update_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- u32 head)
-{
- struct intel_ring *ring = ce->ring;
- u32 *regs = ce->lrc_reg_state;
-
- GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
-
- regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD] = head;
- regs[CTX_RING_TAIL] = ring->tail;
- regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
-
- /* RPCS */
- if (engine->class == RENDER_CLASS) {
- regs[CTX_R_PWR_CLK_STATE] =
- intel_sseu_make_rpcs(engine->gt, &ce->sseu);
-
- i915_oa_init_reg_state(ce, engine);
- }
-
- if (ce->wa_bb_page) {
- u32 *(*fn)(const struct intel_context *ce, u32 *cs);
-
- fn = gen12_emit_indirect_ctx_xcs;
- if (ce->engine->class == RENDER_CLASS)
- fn = gen12_emit_indirect_ctx_rcs;
-
- /* Mutually exclusive wrt to global indirect bb */
- GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
- setup_indirect_ctx_bb(ce, engine, fn);
- }
-}
-
static int
execlists_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww, void **vaddr)
@@ -3519,10 +2585,8 @@ __execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine,
void *vaddr)
{
- ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
- __execlists_update_reg_state(ce, engine, ce->ring->tail);
-
+ ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
return 0;
}
@@ -3544,11 +2608,8 @@ static void execlists_context_reset(struct intel_context *ce)
intel_ring_reset(ce->ring, ce->ring->emit);
/* Scrub away the garbage */
- execlists_init_reg_state(ce->lrc_reg_state,
- ce, ce->engine, ce->ring, true);
- __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
-
- ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
+ lrc_init_regs(ce, ce->engine, ce->ring, true);
+ ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
}
static const struct intel_context_ops execlists_context_ops = {
@@ -3650,330 +2711,6 @@ static int execlists_request_alloc(struct i915_request *request)
return 0;
}
-/*
- * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
- * PIPE_CONTROL instruction. This is required for the flush to happen correctly
- * but there is a slight complication as this is applied in WA batch where the
- * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
- *
- * This WA is also required for Gen9 so extracting as a function avoids
- * code duplication.
- */
-static u32 *
-gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
-{
- /* NB no one else is allowed to scribble over scratch + 256! */
- *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
- *batch++ = 0;
-
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
-
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE,
- 0);
-
- *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
- *batch++ = 0;
-
- return batch;
-}
-
-/*
- * Typically we only have one indirect_ctx and per_ctx batch buffer which are
- * initialized at the beginning and shared across all contexts but this field
- * helps us to have multiple batches at different offsets and select them based
- * on a criteria. At the moment this batch always start at the beginning of the page
- * and at this point we don't have multiple wa_ctx batch buffers.
- *
- * The number of WA applied are not known at the beginning; we use this field
- * to return the no of DWORDS written.
- *
- * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
- * so it adds NOOPs as padding to make it cacheline aligned.
- * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
- * makes a complete batch buffer.
- */
-static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- /* WaDisableCtxRestoreArbitration:bdw,chv */
- *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
- /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
- if (IS_BROADWELL(engine->i915))
- batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
- /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
- /* Actual scratch location is at 128 bytes offset */
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_STORE_DATA_INDEX |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE,
- LRC_PPHWSP_SCRATCH_ADDR);
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- /*
- * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
- * execution depends on the length specified in terms of cache lines
- * in the register CTX_RCS_INDIRECT_CTX
- */
-
- return batch;
-}
-
-struct lri {
- i915_reg_t reg;
- u32 value;
-};
-
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
-{
- GEM_BUG_ON(!count || count > 63);
-
- *batch++ = MI_LOAD_REGISTER_IMM(count);
- do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
- *batch++ = lri->value;
- } while (lri++, --count);
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- static const struct lri lri[] = {
- /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
- {
- COMMON_SLICE_CHICKEN2,
- __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
- 0),
- },
-
- /* BSpec: 11391 */
- {
- FF_SLICE_CHICKEN,
- __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
- FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
- },
-
- /* BSpec: 11299 */
- {
- _3D_CHICKEN3,
- __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
- _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
- }
- };
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
- /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
- batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
- /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_STORE_DATA_INDEX |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE,
- LRC_PPHWSP_SCRATCH_ADDR);
-
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
-
- /* WaMediaPoolStateCmdInWABB:bxt,glk */
- if (HAS_POOLED_EU(engine->i915)) {
- /*
- * EU pool configuration is setup along with golden context
- * during context initialization. This value depends on
- * device type (2x6 or 3x6) and needs to be updated based
- * on which subslice is disabled especially for 2x6
- * devices, however it is safe to load default
- * configuration of 3x6 device instead of masking off
- * corresponding bits because HW ignores bits of a disabled
- * subslice and drops down to appropriate config. Please
- * see render_state_setup() in i915_gem_render_state.c for
- * possible configurations, to avoid duplication they are
- * not shown here again.
- */
- *batch++ = GEN9_MEDIA_POOL_STATE;
- *batch++ = GEN9_MEDIA_POOL_ENABLE;
- *batch++ = 0x00777000;
- *batch++ = 0;
- *batch++ = 0;
- *batch++ = 0;
- }
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-static u32 *
-gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- int i;
-
- /*
- * WaPipeControlBefore3DStateSamplePattern: cnl
- *
- * Ensure the engine is idle prior to programming a
- * 3DSTATE_SAMPLE_PATTERN during a context restore.
- */
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_CS_STALL,
- 0);
- /*
- * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
- * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
- * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
- * confusing. Since gen8_emit_pipe_control() already advances the
- * batch by 6 dwords, we advance the other 10 here, completing a
- * cacheline. It's not clear if the workaround requires this padding
- * before other commands, or if it's just the regular padding we would
- * already have for the workaround bb, so leave it here for now.
- */
- for (i = 0; i < 10; i++)
- *batch++ = MI_NOOP;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
-
-static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int err;
-
- obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err;
- }
-
- err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
- if (err)
- goto err;
-
- engine->wa_ctx.vma = vma;
- return 0;
-
-err:
- i915_gem_object_put(obj);
- return err;
-}
-
-static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
-{
- i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
-}
-
-typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
-
-static int intel_init_workaround_bb(struct intel_engine_cs *engine)
-{
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
- &wa_ctx->per_ctx };
- wa_bb_func_t wa_bb_fn[2];
- void *batch, *batch_ptr;
- unsigned int i;
- int ret;
-
- if (engine->class != RENDER_CLASS)
- return 0;
-
- switch (INTEL_GEN(engine->i915)) {
- case 12:
- case 11:
- return 0;
- case 10:
- wa_bb_fn[0] = gen10_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- case 9:
- wa_bb_fn[0] = gen9_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- case 8:
- wa_bb_fn[0] = gen8_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- return 0;
- }
-
- ret = lrc_setup_wa_ctx(engine);
- if (ret) {
- drm_dbg(&engine->i915->drm,
- "Failed to setup context WA page: %d\n", ret);
- return ret;
- }
-
- batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
-
- /*
- * Emit the two workaround batch buffers, recording the offset from the
- * start of the workaround batch buffer object for each and their
- * respective sizes.
- */
- batch_ptr = batch;
- for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
- wa_bb[i]->offset = batch_ptr - batch;
- if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
- CACHELINE_BYTES))) {
- ret = -EINVAL;
- break;
- }
- if (wa_bb_fn[i])
- batch_ptr = wa_bb_fn[i](engine, batch_ptr);
- wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
- }
- GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
- __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
- __i915_gem_object_release_map(wa_ctx->vma->obj);
- if (ret)
- lrc_destroy_wa_ctx(engine);
-
- return ret;
-}
-
static void reset_csb_pointers(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -4185,25 +2922,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
engine->execlists.reset_ccid = active_ccid(engine);
}
-static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
-{
- int x;
-
- x = lrc_ring_mi_mode(engine);
- if (x != -1) {
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
- }
-}
-
-static void __execlists_reset_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
-{
- u32 *regs = ce->lrc_reg_state;
-
- __reset_stop_ring(regs, engine);
-}
-
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -4287,9 +3005,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
out_replay:
ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
head, ce->ring->tail);
- __execlists_reset_reg_state(ce, engine);
- __execlists_update_reg_state(ce, engine, head);
- ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ lrc_reset_regs(ce, engine);
+ ce->lrc.lrca = lrc_update_regs(ce, engine, head);
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -4487,7 +3204,7 @@ static void execlists_release(struct intel_engine_cs *engine)
execlists_shutdown(engine);
intel_engine_cleanup_common(engine);
- lrc_destroy_wa_ctx(engine);
+ lrc_fini_wa_ctx(engine);
}
static void
@@ -4581,7 +3298,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
- if (intel_init_workaround_bb(engine))
+ if (lrc_init_wa_ctx(engine))
/*
* We continue even if we fail to initialize WA batch
* because we only expect rare glitches but nothing
@@ -4622,136 +3339,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
return 0;
}
-static void init_common_reg_state(u32 * const regs,
- const struct intel_engine_cs *engine,
- const struct intel_ring *ring,
- bool inhibit)
-{
- u32 ctl;
-
- ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
- ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (inhibit)
- ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
- if (INTEL_GEN(engine->i915) < 11)
- ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
- regs[CTX_CONTEXT_CONTROL] = ctl;
-
- regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
- regs[CTX_TIMESTAMP] = 0;
-}
-
-static void init_wa_bb_reg_state(u32 * const regs,
- const struct intel_engine_cs *engine)
-{
- const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
-
- if (wa_ctx->per_ctx.size) {
- const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
- regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
-
- if (wa_ctx->indirect_ctx.size) {
- lrc_ring_setup_indirect_ctx(regs, engine,
- i915_ggtt_offset(wa_ctx->vma) +
- wa_ctx->indirect_ctx.offset,
- wa_ctx->indirect_ctx.size);
- }
-}
-
-static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
-{
- if (i915_vm_is_4lvl(&ppgtt->vm)) {
- /* 64b PPGTT (48bit canonical)
- * PDP0_DESCRIPTOR contains the base address to PML4 and
- * other PDP Descriptors are ignored.
- */
- ASSIGN_CTX_PML4(ppgtt, regs);
- } else {
- ASSIGN_CTX_PDP(ppgtt, regs, 3);
- ASSIGN_CTX_PDP(ppgtt, regs, 2);
- ASSIGN_CTX_PDP(ppgtt, regs, 1);
- ASSIGN_CTX_PDP(ppgtt, regs, 0);
- }
-}
-
-static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
-{
- if (i915_is_ggtt(vm))
- return i915_vm_to_ggtt(vm)->alias;
- else
- return i915_vm_to_ppgtt(vm);
-}
-
-static void execlists_init_reg_state(u32 *regs,
- const struct intel_context *ce,
- const struct intel_engine_cs *engine,
- const struct intel_ring *ring,
- bool inhibit)
-{
- /*
- * A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- *
- * Must keep consistent with virtual_update_register_offsets().
- */
- set_offsets(regs, reg_offsets(engine), engine, inhibit);
-
- init_common_reg_state(regs, engine, ring, inhibit);
- init_ppgtt_reg_state(regs, vm_alias(ce->vm));
-
- init_wa_bb_reg_state(regs, engine);
-
- __reset_stop_ring(regs, engine);
-}
-
-static int
-populate_lr_context(struct intel_context *ce,
- struct drm_i915_gem_object *ctx_obj,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- bool inhibit = true;
- void *vaddr;
-
- vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
- return PTR_ERR(vaddr);
- }
-
- set_redzone(vaddr, engine);
-
- if (engine->default_state) {
- shmem_read(engine->default_state, 0,
- vaddr, engine->context_size);
- __set_bit(CONTEXT_VALID_BIT, &ce->flags);
- inhibit = false;
- }
-
- /* Clear the ppHWSP (inc. per-context counters) */
- memset(vaddr, 0, PAGE_SIZE);
-
- /*
- * The second page of the context object contains some registers which
- * must be set up prior to the first execution.
- */
- execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
- ce, engine, ring, inhibit);
-
- __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
- i915_gem_object_unpin_map(ctx_obj);
- return 0;
-}
-
static struct intel_timeline *pinned_timeline(struct intel_context *ce)
{
struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
@@ -4763,32 +3350,11 @@ static struct intel_timeline *pinned_timeline(struct intel_context *ce)
static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine)
{
- struct drm_i915_gem_object *ctx_obj;
- struct intel_ring *ring;
- struct i915_vma *vma;
- u32 context_size;
- int ret;
-
- GEM_BUG_ON(ce->state);
- context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
-
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- context_size += I915_GTT_PAGE_SIZE; /* for redzone */
-
- if (INTEL_GEN(engine->i915) == 12) {
- ce->wa_bb_page = context_size / PAGE_SIZE;
- context_size += PAGE_SIZE;
- }
-
- ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
- if (IS_ERR(ctx_obj))
- return PTR_ERR(ctx_obj);
+ int err;
- vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto error_deref_obj;
- }
+ err = lrc_alloc(ce, engine);
+ if (err)
+ return err;
if (!page_mask_bits(ce->timeline)) {
struct intel_timeline *tl;
@@ -4801,37 +3367,13 @@ static int __execlists_context_alloc(struct intel_context *ce,
tl = pinned_timeline(ce);
else
tl = intel_timeline_create(engine->gt);
- if (IS_ERR(tl)) {
- ret = PTR_ERR(tl);
- goto error_deref_obj;
- }
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
ce->timeline = tl;
}
- ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
- if (IS_ERR(ring)) {
- ret = PTR_ERR(ring);
- goto error_deref_obj;
- }
-
- ret = populate_lr_context(ce, ctx_obj, engine, ring);
- if (ret) {
- drm_dbg(&engine->i915->drm,
- "Failed to populate LRC: %d\n", ret);
- goto error_ring_free;
- }
-
- ce->ring = ring;
- ce->state = vma;
-
return 0;
-
-error_ring_free:
- intel_ring_put(ring);
-error_deref_obj:
- i915_gem_object_put(ctx_obj);
- return ret;
}
static struct list_head *virtual_queue(struct virtual_engine *ve)
@@ -5470,28 +4012,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&engine->active.lock, flags);
}
-void intel_lr_context_reset(struct intel_engine_cs *engine,
- struct intel_context *ce,
- u32 head,
- bool scrub)
-{
- GEM_BUG_ON(!intel_context_is_pinned(ce));
-
- /*
- * We want a simple context + ring to execute the breadcrumb update.
- * We cannot rely on the context being intact across the GPU hang,
- * so clear it and rebuild just what we need for the breadcrumb.
- * All pending requests for this context will be zapped, and any
- * future request will be after userspace has had the opportunity
- * to recreate its own state.
- */
- if (scrub)
- restore_default_state(ce, engine);
-
- /* Rerun the request; its payload has been neutered (if guilty). */
- __execlists_update_reg_state(ce, engine, head);
-}
-
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
{
@@ -22,25 +22,8 @@ enum {
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
-/* Logical Ring Contexts */
-/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN (0)
-#define LRC_PPHWSP_SZ (1)
-/* After the PPHWSP we have the logical state for the context */
-#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
-
-/* Space within PPHWSP reserved to be used as scratch */
-#define LRC_PPHWSP_SCRATCH 0x34
-#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
-
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
-void intel_lr_context_reset(struct intel_engine_cs *engine,
- struct intel_context *ce,
- u32 head,
- bool scrub);
-
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
new file mode 100644
@@ -0,0 +1,1472 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "gen8_engine_cs.h"
+#include "i915_drv.h"
+#include "i915_perf.h"
+#include "intel_engine.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+#include "intel_lrc.h"
+#include "intel_lrc_reg.h"
+#include "intel_ring.h"
+#include "shmem_utils.h"
+
+static inline unsigned int dword_in_page(void *addr)
+{
+ return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
+ const u8 *data,
+ const struct intel_engine_cs *engine,
+ bool clear)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+ (((x) >> 2) & 0x7f)
+#define END(total_state_size) 0, (total_state_size)
+{
+ const u32 base = engine->mmio_base;
+
+ while (*data) {
+ u8 count, flags;
+
+ if (*data & BIT(7)) { /* skip */
+ count = *data++ & ~BIT(7);
+ if (clear)
+ memset32(regs, MI_NOOP, count);
+ regs += count;
+ continue;
+ }
+
+ count = *data & 0x3f;
+ flags = *data >> 6;
+ data++;
+
+ *regs = MI_LOAD_REGISTER_IMM(count);
+ if (flags & POSTED)
+ *regs |= MI_LRI_FORCE_POSTED;
+ if (INTEL_GEN(engine->i915) >= 11)
+ *regs |= MI_LRI_LRM_CS_MMIO;
+ regs++;
+
+ GEM_BUG_ON(!count);
+ do {
+ u32 offset = 0;
+ u8 v;
+
+ do {
+ v = *data++;
+ offset <<= 7;
+ offset |= v & ~BIT(7);
+ } while (v & BIT(7));
+
+ regs[0] = base + (offset << 2);
+ if (clear)
+ regs[1] = 0;
+ regs += 2;
+ } while (--count);
+ }
+
+ if (clear) {
+ u8 count = *++data;
+
+ /* Clear past the tail for HW access */
+ GEM_BUG_ON(dword_in_page(regs) > count);
+ memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+ /* Close the batch; used mainly by live_lrc_layout() */
+ *regs = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *regs |= BIT(0);
+ }
+}
+
+static const u8 gen8_xcs_offsets[] = {
+ NOP(1),
+ LRI(11, 0),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+
+ NOP(9),
+ LRI(9, 0),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(2, 0),
+ REG16(0x200),
+ REG(0x028),
+
+ END(80)
+};
+
+static const u8 gen9_xcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, POSTED),
+ REG16(0x200),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+
+ END(176)
+};
+
+static const u8 gen12_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END(80)
+};
+
+static const u8 gen8_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x34),
+ REG(0x30),
+ REG(0x38),
+ REG(0x3c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0xc8),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x28),
+ REG(0x9c),
+ REG(0xc0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x68),
+
+ END(176)
+};
+
+static const u8 gen11_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(1, POSTED),
+ REG(0x1b0),
+
+ NOP(10),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(80)
+};
+
+static const u8 gen12_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+ NOP(3 + 9 + 1),
+
+ LRI(51, POSTED),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+ REG(0x084),
+ NOP(1),
+
+ END(192)
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+ /*
+ * The gen12+ lists only have the registers we program in the basic
+ * default state. We rely on the context image using relative
+ * addressing to automatic fixup the register state between the
+ * physical engines for virtual engine.
+ */
+ GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+ !intel_engine_has_relative_mmio(engine));
+
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return gen11_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_rcs_offsets;
+ else
+ return gen8_rcs_offsets;
+ } else {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_xcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_xcs_offsets;
+ else
+ return gen8_xcs_offsets;
+ }
+}
+
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x74;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x68;
+ else if (engine->class == RENDER_CLASS)
+ return 0xd8;
+ else
+ return -1;
+}
+
+static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x12;
+ else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
+ return 0x18;
+ else
+ return -1;
+}
+
+static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_wa_bb_per_ctx(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_indirect_ptr(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
+{
+ if (engine->class != RENDER_CLASS)
+ return -1;
+
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0xb6;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return 0xaa;
+ else
+ return -1;
+}
+
+static u32
+lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
+{
+ switch (INTEL_GEN(engine->i915)) {
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ fallthrough;
+ case 12:
+ return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 11:
+ return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 10:
+ return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 9:
+ return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 8:
+ return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ }
+}
+
+static void
+lrc_setup_indirect_ctx(u32 *regs,
+ const struct intel_engine_cs *engine,
+ u32 ctx_bb_ggtt_addr,
+ u32 size)
+{
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+ GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
+ regs[lrc_ring_indirect_ptr(engine) + 1] =
+ ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+ GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
+ regs[lrc_ring_indirect_offset(engine) + 1] =
+ lrc_ring_indirect_offset_default(engine) << 6;
+}
+
+static void init_common_regs(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool inhibit)
+{
+ u32 ctl;
+
+ ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (inhibit)
+ ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ if (INTEL_GEN(engine->i915) < 11)
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ regs[CTX_CONTEXT_CONTROL] = ctl;
+
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_TIMESTAMP] = 0;
+}
+
+static void init_wa_bb_regs(u32 * const regs,
+ const struct intel_engine_cs *engine)
+{
+ const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+
+ if (wa_ctx->per_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+ regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+ }
+
+ if (wa_ctx->indirect_ctx.size) {
+ lrc_setup_indirect_ctx(regs, engine,
+ i915_ggtt_offset(wa_ctx->vma) +
+ wa_ctx->indirect_ctx.offset,
+ wa_ctx->indirect_ctx.size);
+ }
+}
+
+static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ /* 64b PPGTT (48bit canonical)
+ * PDP0_DESCRIPTOR contains the base address to PML4 and
+ * other PDP Descriptors are ignored.
+ */
+ ASSIGN_CTX_PML4(ppgtt, regs);
+ } else {
+ ASSIGN_CTX_PDP(ppgtt, regs, 3);
+ ASSIGN_CTX_PDP(ppgtt, regs, 2);
+ ASSIGN_CTX_PDP(ppgtt, regs, 1);
+ ASSIGN_CTX_PDP(ppgtt, regs, 0);
+ }
+}
+
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+ if (i915_is_ggtt(vm))
+ return i915_vm_to_ggtt(vm)->alias;
+ else
+ return i915_vm_to_ppgtt(vm);
+}
+
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1) {
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ }
+}
+
+static void __lrc_init_regs(u32 *regs,
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool inhibit)
+{
+ /*
+ * A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ *
+ * Must keep consistent with virtual_update_register_offsets().
+ */
+ set_offsets(regs, reg_offsets(engine), engine, inhibit);
+
+ init_common_regs(regs, engine, ring, inhibit);
+ init_ppgtt_regs(regs, vm_alias(ce->vm));
+
+ init_wa_bb_regs(regs, engine);
+
+ __reset_stop_ring(regs, engine);
+}
+
+void lrc_init_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool inhibit)
+{
+ __lrc_init_regs(ce->lrc_reg_state, ce, engine, ring, inhibit);
+}
+
+void lrc_reset_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ __reset_stop_ring(ce->lrc_reg_state, engine);
+}
+
+void lrc_restore_defaults(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs;
+
+ regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+ __lrc_init_regs(regs, ce, engine, ce->ring, true);
+
+ ce->runtime.last = lrc_get_runtime(ce);
+}
+
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += engine->context_size;
+
+ memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += engine->context_size;
+
+ if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
+ drm_err_once(&engine->i915->drm,
+ "%s context redzone overwritten!\n",
+ engine->name);
+}
+
+void lrc_check_redzone(struct intel_context *ce)
+{
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine);
+}
+
+static int __lrc_setup(struct intel_context *ce,
+ struct drm_i915_gem_object *obj,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ bool inhibit = true;
+ void *vaddr;
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
+ return PTR_ERR(vaddr);
+ }
+
+ set_redzone(vaddr, engine);
+
+ if (engine->default_state) {
+ shmem_read(engine->default_state, 0,
+ vaddr, engine->context_size);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
+ inhibit = false;
+ }
+
+ /* Clear the ppHWSP (inc. per-context counters) */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /*
+ * The second page of the context object contains some registers which
+ * must be set up prior to the first execution.
+ */
+ __lrc_init_regs(vaddr + LRC_STATE_OFFSET, ce, engine, ring, inhibit);
+
+ __i915_gem_object_flush_map(obj, 0, engine->context_size);
+ i915_gem_object_unpin_map(obj);
+ return 0;
+}
+
+static struct i915_vma *
+__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 context_size;
+
+ context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ context_size += I915_GTT_PAGE_SIZE; /* for redzone */
+
+ if (INTEL_GEN(engine->i915) == 12) {
+ ce->wa_bb_page = context_size / PAGE_SIZE;
+ context_size += PAGE_SIZE;
+ }
+
+ obj = i915_gem_object_create_shmem(engine->i915, context_size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ return vma;
+}
+
+int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+ struct intel_ring *ring;
+ struct i915_vma *vma;
+ int err;
+
+ GEM_BUG_ON(ce->state);
+
+ vma = __lrc_alloc_state(ce, engine);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto err_vma;
+ }
+
+ err = __lrc_setup(ce, vma->obj, engine, ring);
+ if (err)
+ goto err_ring;
+
+ ce->ring = ring;
+ ce->state = vma;
+ return 0;
+
+err_ring:
+ intel_ring_put(ring);
+err_vma:
+ i915_vma_put(vma);
+ return err;
+}
+
+static u32 *
+gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ CTX_TIMESTAMP * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_cmd_buf_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static inline u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+ return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+ void *ptr;
+
+ GEM_BUG_ON(!ce->wa_bb_page);
+
+ ptr = ce->lrc_reg_state;
+ ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+ ptr += context_wa_bb_offset(ce);
+
+ return ptr;
+}
+
+static void
+setup_indirect_ctx_bb(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 *(*emit)(const struct intel_context *, u32 *))
+{
+ u32 * const start = context_indirect_bb(ce);
+ u32 *cs;
+
+ cs = emit(ce, start);
+ GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+ while ((unsigned long)cs % CACHELINE_BYTES)
+ *cs++ = MI_NOOP;
+
+ lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
+ i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce),
+ (cs - start) * sizeof(*cs));
+}
+
+/*
+ * The context descriptor encodes various attributes of a context,
+ * including its GTT address and some flags. Because it's fairly
+ * expensive to calculate, we'll just do it once and cache the result,
+ * which remains valid until the context is unpinned.
+ *
+ * This is what a descriptor looks like, from LSB to MSB::
+ *
+ * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
+ * bits 12-31: LRCA, GTT address of (the HWSP of) this context
+ * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
+ * bits 53-54: mbz, reserved for use by hardware
+ * bits 55-63: group ID, currently unused and set to 0
+ *
+ * Starting from Gen11, the upper dword of the descriptor has a new format:
+ *
+ * bits 32-36: reserved
+ * bits 37-47: SW context ID
+ * bits 48:53: engine instance
+ * bit 54: mbz, reserved for use by hardware
+ * bits 55-60: SW counter
+ * bits 61-63: engine class
+ *
+ * engine info, SW context ID and SW counter need to form a unique number
+ * (Context ID) per lrc.
+ */
+static inline u32 lrc_descriptor(const struct intel_context *ce)
+{
+ u32 desc;
+
+ desc = INTEL_LEGACY_32B_CONTEXT;
+ if (i915_vm_is_4lvl(ce->vm))
+ desc = INTEL_LEGACY_64B_CONTEXT;
+ desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+ desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+ if (IS_GEN(ce->vm->i915, 8))
+ desc |= GEN8_CTX_L3LLC_COHERENT;
+
+ return i915_ggtt_offset(ce->state) | desc;
+}
+
+u32 lrc_update_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 head)
+{
+ struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
+
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD] = head;
+ regs[CTX_RING_TAIL] = ring->tail;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+
+ /* RPCS */
+ if (engine->class == RENDER_CLASS) {
+ regs[CTX_R_PWR_CLK_STATE] =
+ intel_sseu_make_rpcs(engine->gt, &ce->sseu);
+
+ i915_oa_init_reg_state(ce, engine);
+ }
+
+ if (ce->wa_bb_page) {
+ u32 *(*fn)(const struct intel_context *ce, u32 *cs);
+
+ fn = gen12_emit_indirect_ctx_xcs;
+ if (ce->engine->class == RENDER_CLASS)
+ fn = gen12_emit_indirect_ctx_rcs;
+
+ /* Mutually exclusive wrt to global indirect bb */
+ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+ setup_indirect_ctx_bb(ce, engine, fn);
+ }
+
+ return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
+}
+
+void lrc_update_offsets(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
+}
+
+void lrc_check_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const char *when)
+{
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
+}
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
+{
+ /* NB no one else is allowed to scribble over scratch + 256! */
+ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+ *batch++ = 0;
+
+ *batch++ = MI_LOAD_REGISTER_IMM(1);
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
+ *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+ *batch++ = 0;
+
+ return batch;
+}
+
+/*
+ * Typically we only have one indirect_ctx and per_ctx batch buffer which are
+ * initialized at the beginning and shared across all contexts but this field
+ * helps us to have multiple batches at different offsets and select them based
+ * on a criteria. At the moment this batch always start at the beginning of the page
+ * and at this point we don't have multiple wa_ctx batch buffers.
+ *
+ * The number of WA applied are not known at the beginning; we use this field
+ * to return the no of DWORDS written.
+ *
+ * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
+ * so it adds NOOPs as padding to make it cacheline aligned.
+ * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
+ * makes a complete batch buffer.
+ */
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ /* WaDisableCtxRestoreArbitration:bdw,chv */
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
+ if (IS_BROADWELL(engine->i915))
+ batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+ /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+ /* Actual scratch location is at 128 bytes offset */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ LRC_PPHWSP_SCRATCH_ADDR);
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ /*
+ * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+ * execution depends on the length specified in terms of cache lines
+ * in the register CTX_RCS_INDIRECT_CTX
+ */
+
+ return batch;
+}
+
+struct lri {
+ i915_reg_t reg;
+ u32 value;
+};
+
+static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+{
+ GEM_BUG_ON(!count || count > 63);
+
+ *batch++ = MI_LOAD_REGISTER_IMM(count);
+ do {
+ *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = lri->value;
+ } while (lri++, --count);
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ static const struct lri lri[] = {
+ /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+ {
+ COMMON_SLICE_CHICKEN2,
+ __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
+ 0),
+ },
+
+ /* BSpec: 11391 */
+ {
+ FF_SLICE_CHICKEN,
+ __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
+ FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
+ },
+
+ /* BSpec: 11299 */
+ {
+ _3D_CHICKEN3,
+ __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
+ _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
+ }
+ };
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+ batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+ /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ LRC_PPHWSP_SCRATCH_ADDR);
+
+ batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+
+ /* WaMediaPoolStateCmdInWABB:bxt,glk */
+ if (HAS_POOLED_EU(engine->i915)) {
+ /*
+ * EU pool configuration is setup along with golden context
+ * during context initialization. This value depends on
+ * device type (2x6 or 3x6) and needs to be updated based
+ * on which subslice is disabled especially for 2x6
+ * devices, however it is safe to load default
+ * configuration of 3x6 device instead of masking off
+ * corresponding bits because HW ignores bits of a disabled
+ * subslice and drops down to appropriate config. Please
+ * see render_state_setup() in i915_gem_render_state.c for
+ * possible configurations, to avoid duplication they are
+ * not shown here again.
+ */
+ *batch++ = GEN9_MEDIA_POOL_STATE;
+ *batch++ = GEN9_MEDIA_POOL_ENABLE;
+ *batch++ = 0x00777000;
+ *batch++ = 0;
+ *batch++ = 0;
+ *batch++ = 0;
+ }
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ int i;
+
+ /*
+ * WaPipeControlBefore3DStateSamplePattern: cnl
+ *
+ * Ensure the engine is idle prior to programming a
+ * 3DSTATE_SAMPLE_PATTERN during a context restore.
+ */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL,
+ 0);
+ /*
+ * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+ * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+ * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+ * confusing. Since gen8_emit_pipe_control() already advances the
+ * batch by 6 dwords, we advance the other 10 here, completing a
+ * cacheline. It's not clear if the workaround requires this padding
+ * before other commands, or if it's just the regular padding we would
+ * already have for the workaround bb, so leave it here for now.
+ */
+ for (i = 0; i < 10; i++)
+ *batch++ = MI_NOOP;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+#define CTX_WA_BB_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err;
+ }
+
+ err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
+ if (err)
+ goto err;
+
+ engine->wa_ctx.vma = vma;
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
+{
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+}
+
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
+int lrc_init_wa_ctx(struct intel_engine_cs *engine)
+{
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+ struct i915_wa_ctx_bb *wa_bb[] = {
+ &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
+ };
+ wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
+ void *batch, *batch_ptr;
+ unsigned int i;
+ int ret;
+
+ if (engine->class != RENDER_CLASS)
+ return 0;
+
+ switch (INTEL_GEN(engine->i915)) {
+ case 12:
+ case 11:
+ return 0;
+ case 10:
+ wa_bb_fn[0] = gen10_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ case 9:
+ wa_bb_fn[0] = gen9_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ case 8:
+ wa_bb_fn[0] = gen8_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ return 0;
+ }
+
+ ret = lrc_setup_wa_ctx(engine);
+ if (ret) {
+ drm_dbg(&engine->i915->drm,
+ "Failed to setup context WA page: %d\n", ret);
+ return ret;
+ }
+
+ batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
+
+ /*
+ * Emit the two workaround batch buffers, recording the offset from the
+ * start of the workaround batch buffer object for each and their
+ * respective sizes.
+ */
+ batch_ptr = batch;
+ for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
+ wa_bb[i]->offset = batch_ptr - batch;
+ if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+ CACHELINE_BYTES))) {
+ ret = -EINVAL;
+ break;
+ }
+ if (wa_bb_fn[i])
+ batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+ wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
+ }
+ GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
+
+ __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
+ __i915_gem_object_release_map(wa_ctx->vma->obj);
+ if (ret)
+ lrc_fini_wa_ctx(engine);
+
+ return ret;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_lrc.c"
+#endif
new file mode 100644
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __INTEL_LRC_H__
+#define __INTEL_LRC_H__
+
+#include <linux/types.h>
+
+#include "intel_context.h"
+#include "intel_lrc_reg.h"
+
+struct drm_i915_gem_object;
+struct intel_engine_cs;
+struct intel_ring;
+
+/* At the start of the context image is its per-process HWS page */
+#define LRC_PPHWSP_PN (0)
+#define LRC_PPHWSP_SZ (1)
+/* After the PPHWSP we have the logical state for the context */
+#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
+
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH 0x34
+#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
+
+int lrc_init_wa_ctx(struct intel_engine_cs *engine);
+void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
+
+int lrc_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
+void lrc_restore_defaults(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
+void lrc_init_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close);
+void lrc_reset_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine);
+
+u32 lrc_update_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 head);
+void lrc_update_offsets(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
+void lrc_check_regs(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const char *when);
+void lrc_check_redzone(struct intel_context *ce);
+
+static inline u32 lrc_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+#endif /* __INTEL_LRC_H__ */
@@ -9,6 +9,8 @@
#include <linux/types.h>
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
/* GEN8 to GEN12 Reg State Context */
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
@@ -249,7 +249,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
}
GEM_BUG_ON(!ce[1]->ring->size);
intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
- __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
+ lrc_update_regs(ce[1], engine, ce[1]->ring->head);
rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq[0])) {
@@ -4705,1777 +4705,3 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
-
-static int emit_semaphore_signal(struct intel_context *ce, void *slot)
-{
- const u32 offset =
- i915_ggtt_offset(ce->engine->status_page.vma) +
- offset_in_page(slot);
- struct i915_request *rq;
- u32 *cs;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs)) {
- i915_request_add(rq);
- return PTR_ERR(cs);
- }
-
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = offset;
- *cs++ = 0;
- *cs++ = 1;
-
- intel_ring_advance(rq, cs);
-
- rq->sched.attr.priority = I915_PRIORITY_BARRIER;
- i915_request_add(rq);
- return 0;
-}
-
-static int context_flush(struct intel_context *ce, long timeout)
-{
- struct i915_request *rq;
- struct dma_fence *fence;
- int err = 0;
-
- rq = intel_engine_create_kernel_request(ce->engine);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- fence = i915_active_fence_get(&ce->timeline->last_request);
- if (fence) {
- i915_request_await_dma_fence(rq, fence);
- dma_fence_put(fence);
- }
-
- rq = i915_request_get(rq);
- i915_request_add(rq);
- if (i915_request_wait(rq, 0, timeout) < 0)
- err = -ETIME;
- i915_request_put(rq);
-
- rmb(); /* We know the request is written, make sure all state is too! */
- return err;
-}
-
-static int live_lrc_layout(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- u32 *lrc;
- int err;
-
- /*
- * Check the registers offsets we use to create the initial reg state
- * match the layout saved by HW.
- */
-
- lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!lrc)
- return -ENOMEM;
-
- err = 0;
- for_each_engine(engine, gt, id) {
- u32 *hw;
- int dw;
-
- if (!engine->default_state)
- continue;
-
- hw = shmem_pin_map(engine->default_state);
- if (IS_ERR(hw)) {
- err = PTR_ERR(hw);
- break;
- }
- hw += LRC_STATE_OFFSET / sizeof(*hw);
-
- execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
- engine->kernel_context,
- engine,
- engine->kernel_context->ring,
- true);
-
- dw = 0;
- do {
- u32 lri = hw[dw];
-
- if (lri == 0) {
- dw++;
- continue;
- }
-
- if (lrc[dw] == 0) {
- pr_debug("%s: skipped instruction %x at dword %d\n",
- engine->name, lri, dw);
- dw++;
- continue;
- }
-
- if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
- pr_err("%s: Expected LRI command at dword %d, found %08x\n",
- engine->name, dw, lri);
- err = -EINVAL;
- break;
- }
-
- if (lrc[dw] != lri) {
- pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
- engine->name, dw, lri, lrc[dw]);
- err = -EINVAL;
- break;
- }
-
- lri &= 0x7f;
- lri++;
- dw++;
-
- while (lri) {
- if (hw[dw] != lrc[dw]) {
- pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
- engine->name, dw, hw[dw], lrc[dw]);
- err = -EINVAL;
- break;
- }
-
- /*
- * Skip over the actual register value as we
- * expect that to differ.
- */
- dw += 2;
- lri -= 2;
- }
- } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
- if (err) {
- pr_info("%s: HW register image:\n", engine->name);
- igt_hexdump(hw, PAGE_SIZE);
-
- pr_info("%s: SW register image:\n", engine->name);
- igt_hexdump(lrc, PAGE_SIZE);
- }
-
- shmem_unpin_map(engine->default_state, hw);
- if (err)
- break;
- }
-
- kfree(lrc);
- return err;
-}
-
-static int find_offset(const u32 *lri, u32 offset)
-{
- int i;
-
- for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
- if (lri[i] == offset)
- return i;
-
- return -1;
-}
-
-static int live_lrc_fixed(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = 0;
-
- /*
- * Check the assumed register offsets match the actual locations in
- * the context image.
- */
-
- for_each_engine(engine, gt, id) {
- const struct {
- u32 reg;
- u32 offset;
- const char *name;
- } tbl[] = {
- {
- i915_mmio_reg_offset(RING_START(engine->mmio_base)),
- CTX_RING_START - 1,
- "RING_START"
- },
- {
- i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
- CTX_RING_CTL - 1,
- "RING_CTL"
- },
- {
- i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
- CTX_RING_HEAD - 1,
- "RING_HEAD"
- },
- {
- i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
- CTX_RING_TAIL - 1,
- "RING_TAIL"
- },
- {
- i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
- lrc_ring_mi_mode(engine),
- "RING_MI_MODE"
- },
- {
- i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
- CTX_BB_STATE - 1,
- "BB_STATE"
- },
- {
- i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
- lrc_ring_wa_bb_per_ctx(engine),
- "RING_BB_PER_CTX_PTR"
- },
- {
- i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
- lrc_ring_indirect_ptr(engine),
- "RING_INDIRECT_CTX_PTR"
- },
- {
- i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
- lrc_ring_indirect_offset(engine),
- "RING_INDIRECT_CTX_OFFSET"
- },
- {
- i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
- CTX_TIMESTAMP - 1,
- "RING_CTX_TIMESTAMP"
- },
- {
- i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
- lrc_ring_gpr0(engine),
- "RING_CS_GPR0"
- },
- {
- i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
- lrc_ring_cmd_buf_cctl(engine),
- "RING_CMD_BUF_CCTL"
- },
- { },
- }, *t;
- u32 *hw;
-
- if (!engine->default_state)
- continue;
-
- hw = shmem_pin_map(engine->default_state);
- if (IS_ERR(hw)) {
- err = PTR_ERR(hw);
- break;
- }
- hw += LRC_STATE_OFFSET / sizeof(*hw);
-
- for (t = tbl; t->name; t++) {
- int dw = find_offset(hw, t->reg);
-
- if (dw != t->offset) {
- pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
- engine->name,
- t->name,
- t->reg,
- dw,
- t->offset);
- err = -EINVAL;
- }
- }
-
- shmem_unpin_map(engine->default_state, hw);
- }
-
- return err;
-}
-
-static int __live_lrc_state(struct intel_engine_cs *engine,
- struct i915_vma *scratch)
-{
- struct intel_context *ce;
- struct i915_request *rq;
- struct i915_gem_ww_ctx ww;
- enum {
- RING_START_IDX = 0,
- RING_TAIL_IDX,
- MAX_IDX
- };
- u32 expected[MAX_IDX];
- u32 *cs;
- int err;
- int n;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- i915_gem_ww_ctx_init(&ww, false);
-retry:
- err = i915_gem_object_lock(scratch->obj, &ww);
- if (!err)
- err = intel_context_pin_ww(ce, &ww);
- if (err)
- goto err_put;
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- cs = intel_ring_begin(rq, 4 * MAX_IDX);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- i915_request_add(rq);
- goto err_unpin;
- }
-
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
- *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
- *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
- *cs++ = 0;
-
- expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
-
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
- *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
- *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
- *cs++ = 0;
-
- err = i915_request_await_object(rq, scratch->obj, true);
- if (!err)
- err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-
- i915_request_get(rq);
- i915_request_add(rq);
- if (err)
- goto err_rq;
-
- intel_engine_flush_submission(engine);
- expected[RING_TAIL_IDX] = ce->ring->tail;
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -ETIME;
- goto err_rq;
- }
-
- cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_rq;
- }
-
- for (n = 0; n < MAX_IDX; n++) {
- if (cs[n] != expected[n]) {
- pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
- engine->name, n, cs[n], expected[n]);
- err = -EINVAL;
- break;
- }
- }
-
- i915_gem_object_unpin_map(scratch->obj);
-
-err_rq:
- i915_request_put(rq);
-err_unpin:
- intel_context_unpin(ce);
-err_put:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
- intel_context_put(ce);
- return err;
-}
-
-static int live_lrc_state(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- struct i915_vma *scratch;
- enum intel_engine_id id;
- int err = 0;
-
- /*
- * Check the live register state matches what we expect for this
- * intel_context.
- */
-
- scratch = create_scratch(gt);
- if (IS_ERR(scratch))
- return PTR_ERR(scratch);
-
- for_each_engine(engine, gt, id) {
- err = __live_lrc_state(engine, scratch);
- if (err)
- break;
- }
-
- if (igt_flush_test(gt->i915))
- err = -EIO;
-
- i915_vma_unpin_and_release(&scratch, 0);
- return err;
-}
-
-static int gpr_make_dirty(struct intel_context *ce)
-{
- struct i915_request *rq;
- u32 *cs;
- int n;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
- if (IS_ERR(cs)) {
- i915_request_add(rq);
- return PTR_ERR(cs);
- }
-
- *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
- for (n = 0; n < NUM_GPR_DW; n++) {
- *cs++ = CS_GPR(ce->engine, n);
- *cs++ = STACK_MAGIC;
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- rq->sched.attr.priority = I915_PRIORITY_BARRIER;
- i915_request_add(rq);
-
- return 0;
-}
-
-static struct i915_request *
-__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
-{
- const u32 offset =
- i915_ggtt_offset(ce->engine->status_page.vma) +
- offset_in_page(slot);
- struct i915_request *rq;
- u32 *cs;
- int err;
- int n;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return rq;
-
- cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
- if (IS_ERR(cs)) {
- i915_request_add(rq);
- return ERR_CAST(cs);
- }
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *cs++ = MI_NOOP;
-
- *cs++ = MI_SEMAPHORE_WAIT |
- MI_SEMAPHORE_GLOBAL_GTT |
- MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
- *cs++ = offset;
- *cs++ = 0;
-
- for (n = 0; n < NUM_GPR_DW; n++) {
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
- *cs++ = CS_GPR(ce->engine, n);
- *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
- *cs++ = 0;
- }
-
- i915_vma_lock(scratch);
- err = i915_request_await_object(rq, scratch->obj, true);
- if (!err)
- err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(scratch);
-
- i915_request_get(rq);
- i915_request_add(rq);
- if (err) {
- i915_request_put(rq);
- rq = ERR_PTR(err);
- }
-
- return rq;
-}
-
-static int __live_lrc_gpr(struct intel_engine_cs *engine,
- struct i915_vma *scratch,
- bool preempt)
-{
- u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
- struct intel_context *ce;
- struct i915_request *rq;
- u32 *cs;
- int err;
- int n;
-
- if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
- return 0; /* GPR only on rcs0 for gen8 */
-
- err = gpr_make_dirty(engine->kernel_context);
- if (err)
- return err;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- rq = __gpr_read(ce, scratch, slot);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_put;
- }
-
- err = wait_for_submit(engine, rq, HZ / 2);
- if (err)
- goto err_rq;
-
- if (preempt) {
- err = gpr_make_dirty(engine->kernel_context);
- if (err)
- goto err_rq;
-
- err = emit_semaphore_signal(engine->kernel_context, slot);
- if (err)
- goto err_rq;
- } else {
- slot[0] = 1;
- wmb();
- }
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -ETIME;
- goto err_rq;
- }
-
- cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_rq;
- }
-
- for (n = 0; n < NUM_GPR_DW; n++) {
- if (cs[n]) {
- pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
- engine->name,
- n / 2, n & 1 ? "udw" : "ldw",
- cs[n]);
- err = -EINVAL;
- break;
- }
- }
-
- i915_gem_object_unpin_map(scratch->obj);
-
-err_rq:
- memset32(&slot[0], -1, 4);
- wmb();
- i915_request_put(rq);
-err_put:
- intel_context_put(ce);
- return err;
-}
-
-static int live_lrc_gpr(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- struct i915_vma *scratch;
- enum intel_engine_id id;
- int err = 0;
-
- /*
- * Check that GPR registers are cleared in new contexts as we need
- * to avoid leaking any information from previous contexts.
- */
-
- scratch = create_scratch(gt);
- if (IS_ERR(scratch))
- return PTR_ERR(scratch);
-
- for_each_engine(engine, gt, id) {
- st_engine_heartbeat_disable(engine);
-
- err = __live_lrc_gpr(engine, scratch, false);
- if (err)
- goto err;
-
- err = __live_lrc_gpr(engine, scratch, true);
- if (err)
- goto err;
-
-err:
- st_engine_heartbeat_enable(engine);
- if (igt_flush_test(gt->i915))
- err = -EIO;
- if (err)
- break;
- }
-
- i915_vma_unpin_and_release(&scratch, 0);
- return err;
-}
-
-static struct i915_request *
-create_timestamp(struct intel_context *ce, void *slot, int idx)
-{
- const u32 offset =
- i915_ggtt_offset(ce->engine->status_page.vma) +
- offset_in_page(slot);
- struct i915_request *rq;
- u32 *cs;
- int err;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return rq;
-
- cs = intel_ring_begin(rq, 10);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err;
- }
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *cs++ = MI_NOOP;
-
- *cs++ = MI_SEMAPHORE_WAIT |
- MI_SEMAPHORE_GLOBAL_GTT |
- MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
- *cs++ = offset;
- *cs++ = 0;
-
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
- *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
- *cs++ = offset + idx * sizeof(u32);
- *cs++ = 0;
-
- intel_ring_advance(rq, cs);
-
- rq->sched.attr.priority = I915_PRIORITY_MASK;
- err = 0;
-err:
- i915_request_get(rq);
- i915_request_add(rq);
- if (err) {
- i915_request_put(rq);
- return ERR_PTR(err);
- }
-
- return rq;
-}
-
-struct lrc_timestamp {
- struct intel_engine_cs *engine;
- struct intel_context *ce[2];
- u32 poison;
-};
-
-static bool timestamp_advanced(u32 start, u32 end)
-{
- return (s32)(end - start) > 0;
-}
-
-static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
-{
- u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
- struct i915_request *rq;
- u32 timestamp;
- int err = 0;
-
- arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
- rq = create_timestamp(arg->ce[0], slot, 1);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- err = wait_for_submit(rq->engine, rq, HZ / 2);
- if (err)
- goto err;
-
- if (preempt) {
- arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
- err = emit_semaphore_signal(arg->ce[1], slot);
- if (err)
- goto err;
- } else {
- slot[0] = 1;
- wmb();
- }
-
- /* And wait for switch to kernel (to save our context to memory) */
- err = context_flush(arg->ce[0], HZ / 2);
- if (err)
- goto err;
-
- if (!timestamp_advanced(arg->poison, slot[1])) {
- pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
- arg->engine->name, preempt ? "preempt" : "simple",
- arg->poison, slot[1]);
- err = -EINVAL;
- }
-
- timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
- if (!timestamp_advanced(slot[1], timestamp)) {
- pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
- arg->engine->name, preempt ? "preempt" : "simple",
- slot[1], timestamp);
- err = -EINVAL;
- }
-
-err:
- memset32(slot, -1, 4);
- i915_request_put(rq);
- return err;
-}
-
-static int live_lrc_timestamp(void *arg)
-{
- struct lrc_timestamp data = {};
- struct intel_gt *gt = arg;
- enum intel_engine_id id;
- const u32 poison[] = {
- 0,
- S32_MAX,
- (u32)S32_MAX + 1,
- U32_MAX,
- };
-
- /*
- * We want to verify that the timestamp is saved and restore across
- * context switches and is monotonic.
- *
- * So we do this with a little bit of LRC poisoning to check various
- * boundary conditions, and see what happens if we preempt the context
- * with a second request (carrying more poison into the timestamp).
- */
-
- for_each_engine(data.engine, gt, id) {
- int i, err = 0;
-
- st_engine_heartbeat_disable(data.engine);
-
- for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
- struct intel_context *tmp;
-
- tmp = intel_context_create(data.engine);
- if (IS_ERR(tmp)) {
- err = PTR_ERR(tmp);
- goto err;
- }
-
- err = intel_context_pin(tmp);
- if (err) {
- intel_context_put(tmp);
- goto err;
- }
-
- data.ce[i] = tmp;
- }
-
- for (i = 0; i < ARRAY_SIZE(poison); i++) {
- data.poison = poison[i];
-
- err = __lrc_timestamp(&data, false);
- if (err)
- break;
-
- err = __lrc_timestamp(&data, true);
- if (err)
- break;
- }
-
-err:
- st_engine_heartbeat_enable(data.engine);
- for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
- if (!data.ce[i])
- break;
-
- intel_context_unpin(data.ce[i]);
- intel_context_put(data.ce[i]);
- }
-
- if (igt_flush_test(gt->i915))
- err = -EIO;
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static struct i915_vma *
-create_user_vma(struct i915_address_space *vm, unsigned long size)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int err;
-
- obj = i915_gem_object_create_internal(vm->i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma)) {
- i915_gem_object_put(obj);
- return vma;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err) {
- i915_gem_object_put(obj);
- return ERR_PTR(err);
- }
-
- return vma;
-}
-
-static struct i915_vma *
-store_context(struct intel_context *ce, struct i915_vma *scratch)
-{
- struct i915_vma *batch;
- u32 dw, x, *cs, *hw;
- u32 *defaults;
-
- batch = create_user_vma(ce->vm, SZ_64K);
- if (IS_ERR(batch))
- return batch;
-
- cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
- if (IS_ERR(cs)) {
- i915_vma_put(batch);
- return ERR_CAST(cs);
- }
-
- defaults = shmem_pin_map(ce->engine->default_state);
- if (!defaults) {
- i915_gem_object_unpin_map(batch->obj);
- i915_vma_put(batch);
- return ERR_PTR(-ENOMEM);
- }
-
- x = 0;
- dw = 0;
- hw = defaults;
- hw += LRC_STATE_OFFSET / sizeof(*hw);
- do {
- u32 len = hw[dw] & 0x7f;
-
- if (hw[dw] == 0) {
- dw++;
- continue;
- }
-
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
- dw += len + 2;
- continue;
- }
-
- dw++;
- len = (len + 1) / 2;
- while (len--) {
- *cs++ = MI_STORE_REGISTER_MEM_GEN8;
- *cs++ = hw[dw];
- *cs++ = lower_32_bits(scratch->node.start + x);
- *cs++ = upper_32_bits(scratch->node.start + x);
-
- dw += 2;
- x += 4;
- }
- } while (dw < PAGE_SIZE / sizeof(u32) &&
- (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
- *cs++ = MI_BATCH_BUFFER_END;
-
- shmem_unpin_map(ce->engine->default_state, defaults);
-
- i915_gem_object_flush_map(batch->obj);
- i915_gem_object_unpin_map(batch->obj);
-
- return batch;
-}
-
-static int move_to_active(struct i915_request *rq,
- struct i915_vma *vma,
- unsigned int flags)
-{
- int err;
-
- i915_vma_lock(vma);
- err = i915_request_await_object(rq, vma->obj, flags);
- if (!err)
- err = i915_vma_move_to_active(vma, rq, flags);
- i915_vma_unlock(vma);
-
- return err;
-}
-
-static struct i915_request *
-record_registers(struct intel_context *ce,
- struct i915_vma *before,
- struct i915_vma *after,
- u32 *sema)
-{
- struct i915_vma *b_before, *b_after;
- struct i915_request *rq;
- u32 *cs;
- int err;
-
- b_before = store_context(ce, before);
- if (IS_ERR(b_before))
- return ERR_CAST(b_before);
-
- b_after = store_context(ce, after);
- if (IS_ERR(b_after)) {
- rq = ERR_CAST(b_after);
- goto err_before;
- }
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- goto err_after;
-
- err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
- if (err)
- goto err_rq;
-
- err = move_to_active(rq, b_before, 0);
- if (err)
- goto err_rq;
-
- err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
- if (err)
- goto err_rq;
-
- err = move_to_active(rq, b_after, 0);
- if (err)
- goto err_rq;
-
- cs = intel_ring_begin(rq, 14);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_rq;
- }
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
- *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
- *cs++ = lower_32_bits(b_before->node.start);
- *cs++ = upper_32_bits(b_before->node.start);
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *cs++ = MI_SEMAPHORE_WAIT |
- MI_SEMAPHORE_GLOBAL_GTT |
- MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
- *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
- offset_in_page(sema);
- *cs++ = 0;
- *cs++ = MI_NOOP;
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
- *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
- *cs++ = lower_32_bits(b_after->node.start);
- *cs++ = upper_32_bits(b_after->node.start);
-
- intel_ring_advance(rq, cs);
-
- WRITE_ONCE(*sema, 0);
- i915_request_get(rq);
- i915_request_add(rq);
-err_after:
- i915_vma_put(b_after);
-err_before:
- i915_vma_put(b_before);
- return rq;
-
-err_rq:
- i915_request_add(rq);
- rq = ERR_PTR(err);
- goto err_after;
-}
-
-static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
-{
- struct i915_vma *batch;
- u32 dw, *cs, *hw;
- u32 *defaults;
-
- batch = create_user_vma(ce->vm, SZ_64K);
- if (IS_ERR(batch))
- return batch;
-
- cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
- if (IS_ERR(cs)) {
- i915_vma_put(batch);
- return ERR_CAST(cs);
- }
-
- defaults = shmem_pin_map(ce->engine->default_state);
- if (!defaults) {
- i915_gem_object_unpin_map(batch->obj);
- i915_vma_put(batch);
- return ERR_PTR(-ENOMEM);
- }
-
- dw = 0;
- hw = defaults;
- hw += LRC_STATE_OFFSET / sizeof(*hw);
- do {
- u32 len = hw[dw] & 0x7f;
-
- if (hw[dw] == 0) {
- dw++;
- continue;
- }
-
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
- dw += len + 2;
- continue;
- }
-
- dw++;
- len = (len + 1) / 2;
- *cs++ = MI_LOAD_REGISTER_IMM(len);
- while (len--) {
- *cs++ = hw[dw];
- *cs++ = poison;
- dw += 2;
- }
- } while (dw < PAGE_SIZE / sizeof(u32) &&
- (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
- *cs++ = MI_BATCH_BUFFER_END;
-
- shmem_unpin_map(ce->engine->default_state, defaults);
-
- i915_gem_object_flush_map(batch->obj);
- i915_gem_object_unpin_map(batch->obj);
-
- return batch;
-}
-
-static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
-{
- struct i915_request *rq;
- struct i915_vma *batch;
- u32 *cs;
- int err;
-
- batch = load_context(ce, poison);
- if (IS_ERR(batch))
- return PTR_ERR(batch);
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_batch;
- }
-
- err = move_to_active(rq, batch, 0);
- if (err)
- goto err_rq;
-
- cs = intel_ring_begin(rq, 8);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_rq;
- }
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
- *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
- *cs++ = lower_32_bits(batch->node.start);
- *cs++ = upper_32_bits(batch->node.start);
-
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
- offset_in_page(sema);
- *cs++ = 0;
- *cs++ = 1;
-
- intel_ring_advance(rq, cs);
-
- rq->sched.attr.priority = I915_PRIORITY_BARRIER;
-err_rq:
- i915_request_add(rq);
-err_batch:
- i915_vma_put(batch);
- return err;
-}
-
-static bool is_moving(u32 a, u32 b)
-{
- return a != b;
-}
-
-static int compare_isolation(struct intel_engine_cs *engine,
- struct i915_vma *ref[2],
- struct i915_vma *result[2],
- struct intel_context *ce,
- u32 poison)
-{
- u32 x, dw, *hw, *lrc;
- u32 *A[2], *B[2];
- u32 *defaults;
- int err = 0;
-
- A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
- if (IS_ERR(A[0]))
- return PTR_ERR(A[0]);
-
- A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
- if (IS_ERR(A[1])) {
- err = PTR_ERR(A[1]);
- goto err_A0;
- }
-
- B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
- if (IS_ERR(B[0])) {
- err = PTR_ERR(B[0]);
- goto err_A1;
- }
-
- B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
- if (IS_ERR(B[1])) {
- err = PTR_ERR(B[1]);
- goto err_B0;
- }
-
- lrc = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(engine->i915));
- if (IS_ERR(lrc)) {
- err = PTR_ERR(lrc);
- goto err_B1;
- }
- lrc += LRC_STATE_OFFSET / sizeof(*hw);
-
- defaults = shmem_pin_map(ce->engine->default_state);
- if (!defaults) {
- err = -ENOMEM;
- goto err_lrc;
- }
-
- x = 0;
- dw = 0;
- hw = defaults;
- hw += LRC_STATE_OFFSET / sizeof(*hw);
- do {
- u32 len = hw[dw] & 0x7f;
-
- if (hw[dw] == 0) {
- dw++;
- continue;
- }
-
- if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
- dw += len + 2;
- continue;
- }
-
- dw++;
- len = (len + 1) / 2;
- while (len--) {
- if (!is_moving(A[0][x], A[1][x]) &&
- (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
- switch (hw[dw] & 4095) {
- case 0x30: /* RING_HEAD */
- case 0x34: /* RING_TAIL */
- break;
-
- default:
- pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
- engine->name, dw,
- hw[dw], hw[dw + 1],
- A[0][x], B[0][x], B[1][x],
- poison, lrc[dw + 1]);
- err = -EINVAL;
- }
- }
- dw += 2;
- x++;
- }
- } while (dw < PAGE_SIZE / sizeof(u32) &&
- (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
- shmem_unpin_map(ce->engine->default_state, defaults);
-err_lrc:
- i915_gem_object_unpin_map(ce->state->obj);
-err_B1:
- i915_gem_object_unpin_map(result[1]->obj);
-err_B0:
- i915_gem_object_unpin_map(result[0]->obj);
-err_A1:
- i915_gem_object_unpin_map(ref[1]->obj);
-err_A0:
- i915_gem_object_unpin_map(ref[0]->obj);
- return err;
-}
-
-static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
-{
- u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
- struct i915_vma *ref[2], *result[2];
- struct intel_context *A, *B;
- struct i915_request *rq;
- int err;
-
- A = intel_context_create(engine);
- if (IS_ERR(A))
- return PTR_ERR(A);
-
- B = intel_context_create(engine);
- if (IS_ERR(B)) {
- err = PTR_ERR(B);
- goto err_A;
- }
-
- ref[0] = create_user_vma(A->vm, SZ_64K);
- if (IS_ERR(ref[0])) {
- err = PTR_ERR(ref[0]);
- goto err_B;
- }
-
- ref[1] = create_user_vma(A->vm, SZ_64K);
- if (IS_ERR(ref[1])) {
- err = PTR_ERR(ref[1]);
- goto err_ref0;
- }
-
- rq = record_registers(A, ref[0], ref[1], sema);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ref1;
- }
-
- WRITE_ONCE(*sema, 1);
- wmb();
-
- if (i915_request_wait(rq, 0, HZ / 2) < 0) {
- i915_request_put(rq);
- err = -ETIME;
- goto err_ref1;
- }
- i915_request_put(rq);
-
- result[0] = create_user_vma(A->vm, SZ_64K);
- if (IS_ERR(result[0])) {
- err = PTR_ERR(result[0]);
- goto err_ref1;
- }
-
- result[1] = create_user_vma(A->vm, SZ_64K);
- if (IS_ERR(result[1])) {
- err = PTR_ERR(result[1]);
- goto err_result0;
- }
-
- rq = record_registers(A, result[0], result[1], sema);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_result1;
- }
-
- err = poison_registers(B, poison, sema);
- if (err) {
- WRITE_ONCE(*sema, -1);
- i915_request_put(rq);
- goto err_result1;
- }
-
- if (i915_request_wait(rq, 0, HZ / 2) < 0) {
- i915_request_put(rq);
- err = -ETIME;
- goto err_result1;
- }
- i915_request_put(rq);
-
- err = compare_isolation(engine, ref, result, A, poison);
-
-err_result1:
- i915_vma_put(result[1]);
-err_result0:
- i915_vma_put(result[0]);
-err_ref1:
- i915_vma_put(ref[1]);
-err_ref0:
- i915_vma_put(ref[0]);
-err_B:
- intel_context_put(B);
-err_A:
- intel_context_put(A);
- return err;
-}
-
-static bool skip_isolation(const struct intel_engine_cs *engine)
-{
- if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
- return true;
-
- if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
- return true;
-
- return false;
-}
-
-static int live_lrc_isolation(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- const u32 poison[] = {
- STACK_MAGIC,
- 0x3a3a3a3a,
- 0x5c5c5c5c,
- 0xffffffff,
- 0xffff0000,
- };
- int err = 0;
-
- /*
- * Our goal is try and verify that per-context state cannot be
- * tampered with by another non-privileged client.
- *
- * We take the list of context registers from the LRI in the default
- * context image and attempt to modify that list from a remote context.
- */
-
- for_each_engine(engine, gt, id) {
- int i;
-
- /* Just don't even ask */
- if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
- skip_isolation(engine))
- continue;
-
- intel_engine_pm_get(engine);
- for (i = 0; i < ARRAY_SIZE(poison); i++) {
- int result;
-
- result = __lrc_isolation(engine, poison[i]);
- if (result && !err)
- err = result;
-
- result = __lrc_isolation(engine, ~poison[i]);
- if (result && !err)
- err = result;
- }
- intel_engine_pm_put(engine);
- if (igt_flush_test(gt->i915)) {
- err = -EIO;
- break;
- }
- }
-
- return err;
-}
-
-static int indirect_ctx_submit_req(struct intel_context *ce)
-{
- struct i915_request *rq;
- int err = 0;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -ETIME;
-
- i915_request_put(rq);
-
- return err;
-}
-
-#define CTX_BB_CANARY_OFFSET (3 * 1024)
-#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
-
-static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
- MI_SRM_LRM_GLOBAL_GTT |
- MI_LRI_LRM_CS_MMIO;
- *cs++ = i915_mmio_reg_offset(RING_START(0));
- *cs++ = i915_ggtt_offset(ce->state) +
- context_wa_bb_offset(ce) +
- CTX_BB_CANARY_OFFSET;
- *cs++ = 0;
-
- return cs;
-}
-
-static void
-indirect_ctx_bb_setup(struct intel_context *ce)
-{
- u32 *cs = context_indirect_bb(ce);
-
- cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
-
- setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
-}
-
-static bool check_ring_start(struct intel_context *ce)
-{
- const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
- LRC_STATE_OFFSET + context_wa_bb_offset(ce);
-
- if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
- return true;
-
- pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
- ctx_bb[CTX_BB_CANARY_INDEX],
- ce->lrc_reg_state[CTX_RING_START]);
-
- return false;
-}
-
-static int indirect_ctx_bb_check(struct intel_context *ce)
-{
- int err;
-
- err = indirect_ctx_submit_req(ce);
- if (err)
- return err;
-
- if (!check_ring_start(ce))
- return -EINVAL;
-
- return 0;
-}
-
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
-{
- struct intel_context *a, *b;
- int err;
-
- a = intel_context_create(engine);
- if (IS_ERR(a))
- return PTR_ERR(a);
- err = intel_context_pin(a);
- if (err)
- goto put_a;
-
- b = intel_context_create(engine);
- if (IS_ERR(b)) {
- err = PTR_ERR(b);
- goto unpin_a;
- }
- err = intel_context_pin(b);
- if (err)
- goto put_b;
-
- /* We use the already reserved extra page in context state */
- if (!a->wa_bb_page) {
- GEM_BUG_ON(b->wa_bb_page);
- GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
- goto unpin_b;
- }
-
- /*
- * In order to test that our per context bb is truly per context,
- * and executes at the intended spot on context restoring process,
- * make the batch store the ring start value to memory.
- * As ring start is restored apriori of starting the indirect ctx bb and
- * as it will be different for each context, it fits to this purpose.
- */
- indirect_ctx_bb_setup(a);
- indirect_ctx_bb_setup(b);
-
- err = indirect_ctx_bb_check(a);
- if (err)
- goto unpin_b;
-
- err = indirect_ctx_bb_check(b);
-
-unpin_b:
- intel_context_unpin(b);
-put_b:
- intel_context_put(b);
-unpin_a:
- intel_context_unpin(a);
-put_a:
- intel_context_put(a);
-
- return err;
-}
-
-static int live_lrc_indirect_ctx_bb(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = 0;
-
- for_each_engine(engine, gt, id) {
- intel_engine_pm_get(engine);
- err = __live_lrc_indirect_ctx_bb(engine);
- intel_engine_pm_put(engine);
-
- if (igt_flush_test(gt->i915))
- err = -EIO;
-
- if (err)
- break;
- }
-
- return err;
-}
-
-static void garbage_reset(struct intel_engine_cs *engine,
- struct i915_request *rq)
-{
- const unsigned int bit = I915_RESET_ENGINE + engine->id;
- unsigned long *lock = &engine->gt->reset.flags;
-
- if (test_and_set_bit(bit, lock))
- return;
-
- tasklet_disable(&engine->execlists.tasklet);
-
- if (!rq->fence.error)
- intel_engine_reset(engine, NULL);
-
- tasklet_enable(&engine->execlists.tasklet);
- clear_and_wake_up_bit(bit, lock);
-}
-
-static struct i915_request *garbage(struct intel_context *ce,
- struct rnd_state *prng)
-{
- struct i915_request *rq;
- int err;
-
- err = intel_context_pin(ce);
- if (err)
- return ERR_PTR(err);
-
- prandom_bytes_state(prng,
- ce->lrc_reg_state,
- ce->engine->context_size -
- LRC_STATE_OFFSET);
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
- return rq;
-
-err_unpin:
- intel_context_unpin(ce);
- return ERR_PTR(err);
-}
-
-static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
-{
- struct intel_context *ce;
- struct i915_request *hang;
- int err = 0;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- hang = garbage(ce, prng);
- if (IS_ERR(hang)) {
- err = PTR_ERR(hang);
- goto err_ce;
- }
-
- if (wait_for_submit(engine, hang, HZ / 2)) {
- i915_request_put(hang);
- err = -ETIME;
- goto err_ce;
- }
-
- intel_context_set_banned(ce);
- garbage_reset(engine, hang);
-
- intel_engine_flush_submission(engine);
- if (!hang->fence.error) {
- i915_request_put(hang);
- pr_err("%s: corrupted context was not reset\n",
- engine->name);
- err = -EINVAL;
- goto err_ce;
- }
-
- if (i915_request_wait(hang, 0, HZ / 2) < 0) {
- pr_err("%s: corrupted context did not recover\n",
- engine->name);
- i915_request_put(hang);
- err = -EIO;
- goto err_ce;
- }
- i915_request_put(hang);
-
-err_ce:
- intel_context_put(ce);
- return err;
-}
-
-static int live_lrc_garbage(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- /*
- * Verify that we can recover if one context state is completely
- * corrupted.
- */
-
- if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
- return 0;
-
- for_each_engine(engine, gt, id) {
- I915_RND_STATE(prng);
- int err = 0, i;
-
- if (!intel_has_reset_engine(engine->gt))
- continue;
-
- intel_engine_pm_get(engine);
- for (i = 0; i < 3; i++) {
- err = __lrc_garbage(engine, &prng);
- if (err)
- break;
- }
- intel_engine_pm_put(engine);
-
- if (igt_flush_test(gt->i915))
- err = -EIO;
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
-{
- struct intel_context *ce;
- struct i915_request *rq;
- IGT_TIMEOUT(end_time);
- int err;
-
- ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- ce->runtime.num_underflow = 0;
- ce->runtime.max_underflow = 0;
-
- do {
- unsigned int loop = 1024;
-
- while (loop) {
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_rq;
- }
-
- if (--loop == 0)
- i915_request_get(rq);
-
- i915_request_add(rq);
- }
-
- if (__igt_timeout(end_time, NULL))
- break;
-
- i915_request_put(rq);
- } while (1);
-
- err = i915_request_wait(rq, 0, HZ / 5);
- if (err < 0) {
- pr_err("%s: request not completed!\n", engine->name);
- goto err_wait;
- }
-
- igt_flush_test(engine->i915);
-
- pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
- engine->name,
- intel_context_get_total_runtime_ns(ce),
- intel_context_get_avg_runtime_ns(ce));
-
- err = 0;
- if (ce->runtime.num_underflow) {
- pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
- engine->name,
- ce->runtime.num_underflow,
- ce->runtime.max_underflow);
- GEM_TRACE_DUMP();
- err = -EOVERFLOW;
- }
-
-err_wait:
- i915_request_put(rq);
-err_rq:
- intel_context_put(ce);
- return err;
-}
-
-static int live_pphwsp_runtime(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = 0;
-
- /*
- * Check that cumulative context runtime as stored in the pphwsp[16]
- * is monotonic.
- */
-
- for_each_engine(engine, gt, id) {
- err = __live_pphwsp_runtime(engine);
- if (err)
- break;
- }
-
- if (igt_flush_test(gt->i915))
- err = -EIO;
-
- return err;
-}
-
-int intel_lrc_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(live_lrc_layout),
- SUBTEST(live_lrc_fixed),
- SUBTEST(live_lrc_state),
- SUBTEST(live_lrc_gpr),
- SUBTEST(live_lrc_isolation),
- SUBTEST(live_lrc_timestamp),
- SUBTEST(live_lrc_garbage),
- SUBTEST(live_pphwsp_runtime),
- SUBTEST(live_lrc_indirect_ctx_bb),
- };
-
- if (!HAS_LOGICAL_RING_CONTEXTS(i915))
- return 0;
-
- return intel_gt_live_subtests(tests, &i915->gt);
-}
new file mode 100644
@@ -0,0 +1,1864 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_reset.h"
+#include "intel_ring.h"
+#include "selftest_engine_heartbeat.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/lib_sw_fence.h"
+#include "shmem_utils.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
+
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR 16
+#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static bool is_active(struct i915_request *rq)
+{
+ if (i915_request_is_active(rq))
+ return true;
+
+ if (i915_request_on_hold(rq))
+ return true;
+
+ if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
+ return true;
+
+ return false;
+}
+
+static int wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
+{
+ timeout += jiffies;
+ do {
+ bool done = time_after(jiffies, timeout);
+
+ if (i915_request_completed(rq)) /* that was quick! */
+ return 0;
+
+ /* Wait until the HW has acknowleged the submission (or err) */
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
+ return 0;
+
+ if (done)
+ return -ETIME;
+
+ cond_resched();
+ } while (1);
+}
+
+static int emit_semaphore_signal(struct intel_context *ce, void *slot)
+{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset;
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_add(rq);
+ return 0;
+}
+
+static int context_flush(struct intel_context *ce, long timeout)
+{
+ struct i915_request *rq;
+ struct dma_fence *fence;
+ int err = 0;
+
+ rq = intel_engine_create_kernel_request(ce->engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ fence = i915_active_fence_get(&ce->timeline->last_request);
+ if (fence) {
+ i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+ }
+
+ rq = i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, timeout) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ rmb(); /* We know the request is written, make sure all state is too! */
+ return err;
+}
+
+static int live_lrc_layout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 *lrc;
+ int err;
+
+ /*
+ * Check the registers offsets we use to create the initial reg state
+ * match the layout saved by HW.
+ */
+
+ lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!lrc)
+ return -ENOMEM;
+
+ err = 0;
+ for_each_engine(engine, gt, id) {
+ u32 *hw;
+ int dw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = shmem_pin_map(engine->default_state);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+
+ __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
+ engine->kernel_context,
+ engine,
+ engine->kernel_context->ring,
+ true);
+
+ dw = 0;
+ do {
+ u32 lri = hw[dw];
+
+ if (lri == 0) {
+ dw++;
+ continue;
+ }
+
+ if (lrc[dw] == 0) {
+ pr_debug("%s: skipped instruction %x at dword %d\n",
+ engine->name, lri, dw);
+ dw++;
+ continue;
+ }
+
+ if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+ engine->name, dw, lri);
+ err = -EINVAL;
+ break;
+ }
+
+ if (lrc[dw] != lri) {
+ pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+ engine->name, dw, lri, lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ lri &= 0x7f;
+ lri++;
+ dw++;
+
+ while (lri) {
+ if (hw[dw] != lrc[dw]) {
+ pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+ engine->name, dw, hw[dw], lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Skip over the actual register value as we
+ * expect that to differ.
+ */
+ dw += 2;
+ lri -= 2;
+ }
+ } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ if (err) {
+ pr_info("%s: HW register image:\n", engine->name);
+ igt_hexdump(hw, PAGE_SIZE);
+
+ pr_info("%s: SW register image:\n", engine->name);
+ igt_hexdump(lrc, PAGE_SIZE);
+ }
+
+ shmem_unpin_map(engine->default_state, hw);
+ if (err)
+ break;
+ }
+
+ kfree(lrc);
+ return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+ int i;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+ if (lri[i] == offset)
+ return i;
+
+ return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the assumed register offsets match the actual locations in
+ * the context image.
+ */
+
+ for_each_engine(engine, gt, id) {
+ const struct {
+ u32 reg;
+ u32 offset;
+ const char *name;
+ } tbl[] = {
+ {
+ i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+ CTX_RING_START - 1,
+ "RING_START"
+ },
+ {
+ i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+ CTX_RING_CTL - 1,
+ "RING_CTL"
+ },
+ {
+ i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+ CTX_RING_HEAD - 1,
+ "RING_HEAD"
+ },
+ {
+ i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+ CTX_RING_TAIL - 1,
+ "RING_TAIL"
+ },
+ {
+ i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+ lrc_ring_mi_mode(engine),
+ "RING_MI_MODE"
+ },
+ {
+ i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+ CTX_BB_STATE - 1,
+ "BB_STATE"
+ },
+ {
+ i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
+ lrc_ring_wa_bb_per_ctx(engine),
+ "RING_BB_PER_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
+ lrc_ring_indirect_ptr(engine),
+ "RING_INDIRECT_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
+ lrc_ring_indirect_offset(engine),
+ "RING_INDIRECT_CTX_OFFSET"
+ },
+ {
+ i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+ CTX_TIMESTAMP - 1,
+ "RING_CTX_TIMESTAMP"
+ },
+ {
+ i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
+ lrc_ring_gpr0(engine),
+ "RING_CS_GPR0"
+ },
+ {
+ i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
+ lrc_ring_cmd_buf_cctl(engine),
+ "RING_CMD_BUF_CCTL"
+ },
+ { },
+ }, *t;
+ u32 *hw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = shmem_pin_map(engine->default_state);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+
+ for (t = tbl; t->name; t++) {
+ int dw = find_offset(hw, t->reg);
+
+ if (dw != t->offset) {
+ pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+ engine->name,
+ t->name,
+ t->reg,
+ dw,
+ t->offset);
+ err = -EINVAL;
+ }
+ }
+
+ shmem_unpin_map(engine->default_state, hw);
+ }
+
+ return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct i915_gem_ww_ctx ww;
+ enum {
+ RING_START_IDX = 0,
+ RING_TAIL_IDX,
+ MAX_IDX
+ };
+ u32 expected[MAX_IDX];
+ u32 *cs;
+ int err;
+ int n;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(scratch->obj, &ww);
+ if (!err)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err_put;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ cs = intel_ring_begin(rq, 4 * MAX_IDX);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_unpin;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ err = i915_request_await_object(rq, scratch->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err)
+ goto err_rq;
+
+ intel_engine_flush_submission(engine);
+ expected[RING_TAIL_IDX] = ce->ring->tail;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < MAX_IDX; n++) {
+ if (cs[n] != expected[n]) {
+ pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+ engine->name, n, cs[n], expected[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err_put:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the live register state matches what we expect for this
+ * intel_context.
+ */
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ for_each_engine(engine, gt, id) {
+ err = __live_lrc_state(engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+static int gpr_make_dirty(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ u32 *cs;
+ int n;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = CS_GPR(ce->engine, n);
+ *cs++ = STACK_MAGIC;
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_add(rq);
+
+ return 0;
+}
+
+static struct i915_request *
+__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
+{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return ERR_CAST(cs);
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = offset;
+ *cs++ = 0;
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(ce->engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+ }
+
+ i915_vma_lock(scratch);
+ err = i915_request_await_object(rq, scratch->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(scratch);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ rq = ERR_PTR(err);
+ }
+
+ return rq;
+}
+
+static int __live_lrc_gpr(struct intel_engine_cs *engine,
+ struct i915_vma *scratch,
+ bool preempt)
+{
+ u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
+ struct intel_context *ce;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+ return 0; /* GPR only on rcs0 for gen8 */
+
+ err = gpr_make_dirty(engine->kernel_context);
+ if (err)
+ return err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ rq = __gpr_read(ce, scratch, slot);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_put;
+ }
+
+ err = wait_for_submit(engine, rq, HZ / 2);
+ if (err)
+ goto err_rq;
+
+ if (preempt) {
+ err = gpr_make_dirty(engine->kernel_context);
+ if (err)
+ goto err_rq;
+
+ err = emit_semaphore_signal(engine->kernel_context, slot);
+ if (err)
+ goto err_rq;
+ } else {
+ slot[0] = 1;
+ wmb();
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n]) {
+ pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+ engine->name,
+ n / 2, n & 1 ? "udw" : "ldw",
+ cs[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ memset32(&slot[0], -1, 4);
+ wmb();
+ i915_request_put(rq);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_gpr(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that GPR registers are cleared in new contexts as we need
+ * to avoid leaking any information from previous contexts.
+ */
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ for_each_engine(engine, gt, id) {
+ st_engine_heartbeat_disable(engine);
+
+ err = __live_lrc_gpr(engine, scratch, false);
+ if (err)
+ goto err;
+
+ err = __live_lrc_gpr(engine, scratch, true);
+ if (err)
+ goto err;
+
+err:
+ st_engine_heartbeat_enable(engine);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+static struct i915_request *
+create_timestamp(struct intel_context *ce, void *slot, int idx)
+{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ cs = intel_ring_begin(rq, 10);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = offset;
+ *cs++ = 0;
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = offset + idx * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_MASK;
+ err = 0;
+err:
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ return ERR_PTR(err);
+ }
+
+ return rq;
+}
+
+struct lrc_timestamp {
+ struct intel_engine_cs *engine;
+ struct intel_context *ce[2];
+ u32 poison;
+};
+
+static bool timestamp_advanced(u32 start, u32 end)
+{
+ return (s32)(end - start) > 0;
+}
+
+static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
+{
+ u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
+ struct i915_request *rq;
+ u32 timestamp;
+ int err = 0;
+
+ arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
+ rq = create_timestamp(arg->ce[0], slot, 1);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ err = wait_for_submit(rq->engine, rq, HZ / 2);
+ if (err)
+ goto err;
+
+ if (preempt) {
+ arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
+ err = emit_semaphore_signal(arg->ce[1], slot);
+ if (err)
+ goto err;
+ } else {
+ slot[0] = 1;
+ wmb();
+ }
+
+ /* And wait for switch to kernel (to save our context to memory) */
+ err = context_flush(arg->ce[0], HZ / 2);
+ if (err)
+ goto err;
+
+ if (!timestamp_advanced(arg->poison, slot[1])) {
+ pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
+ arg->engine->name, preempt ? "preempt" : "simple",
+ arg->poison, slot[1]);
+ err = -EINVAL;
+ }
+
+ timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
+ if (!timestamp_advanced(slot[1], timestamp)) {
+ pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
+ arg->engine->name, preempt ? "preempt" : "simple",
+ slot[1], timestamp);
+ err = -EINVAL;
+ }
+
+err:
+ memset32(slot, -1, 4);
+ i915_request_put(rq);
+ return err;
+}
+
+static int live_lrc_timestamp(void *arg)
+{
+ struct lrc_timestamp data = {};
+ struct intel_gt *gt = arg;
+ enum intel_engine_id id;
+ const u32 poison[] = {
+ 0,
+ S32_MAX,
+ (u32)S32_MAX + 1,
+ U32_MAX,
+ };
+
+ /*
+ * We want to verify that the timestamp is saved and restore across
+ * context switches and is monotonic.
+ *
+ * So we do this with a little bit of LRC poisoning to check various
+ * boundary conditions, and see what happens if we preempt the context
+ * with a second request (carrying more poison into the timestamp).
+ */
+
+ for_each_engine(data.engine, gt, id) {
+ int i, err = 0;
+
+ st_engine_heartbeat_disable(data.engine);
+
+ for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(data.engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err;
+ }
+
+ data.ce[i] = tmp;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ data.poison = poison[i];
+
+ err = __lrc_timestamp(&data, false);
+ if (err)
+ break;
+
+ err = __lrc_timestamp(&data, true);
+ if (err)
+ break;
+ }
+
+err:
+ st_engine_heartbeat_enable(data.engine);
+ for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+ if (!data.ce[i])
+ break;
+
+ intel_context_unpin(data.ce[i]);
+ intel_context_put(data.ce[i]);
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct i915_vma *
+create_user_vma(struct i915_address_space *vm, unsigned long size)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static struct i915_vma *
+store_context(struct intel_context *ce, struct i915_vma *scratch)
+{
+ struct i915_vma *batch;
+ u32 dw, x, *cs, *hw;
+ u32 *defaults;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ x = 0;
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = hw[dw];
+ *cs++ = lower_32_bits(scratch->node.start + x);
+ *cs++ = upper_32_bits(scratch->node.start + x);
+
+ dw += 2;
+ x += 4;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int move_to_active(struct i915_request *rq,
+ struct i915_vma *vma,
+ unsigned int flags)
+{
+ int err;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, flags);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, flags);
+ i915_vma_unlock(vma);
+
+ return err;
+}
+
+static struct i915_request *
+record_registers(struct intel_context *ce,
+ struct i915_vma *before,
+ struct i915_vma *after,
+ u32 *sema)
+{
+ struct i915_vma *b_before, *b_after;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+
+ b_before = store_context(ce, before);
+ if (IS_ERR(b_before))
+ return ERR_CAST(b_before);
+
+ b_after = store_context(ce, after);
+ if (IS_ERR(b_after)) {
+ rq = ERR_CAST(b_after);
+ goto err_before;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_after;
+
+ err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_before, 0);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_after, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 14);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_before->node.start);
+ *cs++ = upper_32_bits(b_before->node.start);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_after->node.start);
+ *cs++ = upper_32_bits(b_after->node.start);
+
+ intel_ring_advance(rq, cs);
+
+ WRITE_ONCE(*sema, 0);
+ i915_request_get(rq);
+ i915_request_add(rq);
+err_after:
+ i915_vma_put(b_after);
+err_before:
+ i915_vma_put(b_before);
+ return rq;
+
+err_rq:
+ i915_request_add(rq);
+ rq = ERR_PTR(err);
+ goto err_after;
+}
+
+static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
+{
+ struct i915_vma *batch;
+ u32 dw, *cs, *hw;
+ u32 *defaults;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ *cs++ = MI_LOAD_REGISTER_IMM(len);
+ while (len--) {
+ *cs++ = hw[dw];
+ *cs++ = poison;
+ dw += 2;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
+{
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ u32 *cs;
+ int err;
+
+ batch = load_context(ce, poison);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_batch;
+ }
+
+ err = move_to_active(rq, batch, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(batch->node.start);
+ *cs++ = upper_32_bits(batch->node.start);
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+err_rq:
+ i915_request_add(rq);
+err_batch:
+ i915_vma_put(batch);
+ return err;
+}
+
+static bool is_moving(u32 a, u32 b)
+{
+ return a != b;
+}
+
+static int compare_isolation(struct intel_engine_cs *engine,
+ struct i915_vma *ref[2],
+ struct i915_vma *result[2],
+ struct intel_context *ce,
+ u32 poison)
+{
+ u32 x, dw, *hw, *lrc;
+ u32 *A[2], *B[2];
+ u32 *defaults;
+ int err = 0;
+
+ A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
+ if (IS_ERR(A[0]))
+ return PTR_ERR(A[0]);
+
+ A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
+ if (IS_ERR(A[1])) {
+ err = PTR_ERR(A[1]);
+ goto err_A0;
+ }
+
+ B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
+ if (IS_ERR(B[0])) {
+ err = PTR_ERR(B[0]);
+ goto err_A1;
+ }
+
+ B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
+ if (IS_ERR(B[1])) {
+ err = PTR_ERR(B[1]);
+ goto err_B0;
+ }
+
+ lrc = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(engine->i915));
+ if (IS_ERR(lrc)) {
+ err = PTR_ERR(lrc);
+ goto err_B1;
+ }
+ lrc += LRC_STATE_OFFSET / sizeof(*hw);
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ err = -ENOMEM;
+ goto err_lrc;
+ }
+
+ x = 0;
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ if (!is_moving(A[0][x], A[1][x]) &&
+ (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
+ switch (hw[dw] & 4095) {
+ case 0x30: /* RING_HEAD */
+ case 0x34: /* RING_TAIL */
+ break;
+
+ default:
+ pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
+ engine->name, dw,
+ hw[dw], hw[dw + 1],
+ A[0][x], B[0][x], B[1][x],
+ poison, lrc[dw + 1]);
+ err = -EINVAL;
+ }
+ }
+ dw += 2;
+ x++;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
+ i915_gem_object_unpin_map(ce->state->obj);
+err_B1:
+ i915_gem_object_unpin_map(result[1]->obj);
+err_B0:
+ i915_gem_object_unpin_map(result[0]->obj);
+err_A1:
+ i915_gem_object_unpin_map(ref[1]->obj);
+err_A0:
+ i915_gem_object_unpin_map(ref[0]->obj);
+ return err;
+}
+
+static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
+{
+ u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
+ struct i915_vma *ref[2], *result[2];
+ struct intel_context *A, *B;
+ struct i915_request *rq;
+ int err;
+
+ A = intel_context_create(engine);
+ if (IS_ERR(A))
+ return PTR_ERR(A);
+
+ B = intel_context_create(engine);
+ if (IS_ERR(B)) {
+ err = PTR_ERR(B);
+ goto err_A;
+ }
+
+ ref[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[0])) {
+ err = PTR_ERR(ref[0]);
+ goto err_B;
+ }
+
+ ref[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[1])) {
+ err = PTR_ERR(ref[1]);
+ goto err_ref0;
+ }
+
+ rq = record_registers(A, ref[0], ref[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ref1;
+ }
+
+ WRITE_ONCE(*sema, 1);
+ wmb();
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ref1;
+ }
+ i915_request_put(rq);
+
+ result[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[0])) {
+ err = PTR_ERR(result[0]);
+ goto err_ref1;
+ }
+
+ result[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[1])) {
+ err = PTR_ERR(result[1]);
+ goto err_result0;
+ }
+
+ rq = record_registers(A, result[0], result[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_result1;
+ }
+
+ err = poison_registers(B, poison, sema);
+ if (err) {
+ WRITE_ONCE(*sema, -1);
+ i915_request_put(rq);
+ goto err_result1;
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_result1;
+ }
+ i915_request_put(rq);
+
+ err = compare_isolation(engine, ref, result, A, poison);
+
+err_result1:
+ i915_vma_put(result[1]);
+err_result0:
+ i915_vma_put(result[0]);
+err_ref1:
+ i915_vma_put(ref[1]);
+err_ref0:
+ i915_vma_put(ref[0]);
+err_B:
+ intel_context_put(B);
+err_A:
+ intel_context_put(A);
+ return err;
+}
+
+static bool skip_isolation(const struct intel_engine_cs *engine)
+{
+ if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
+ return true;
+
+ if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
+ return true;
+
+ return false;
+}
+
+static int live_lrc_isolation(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const u32 poison[] = {
+ STACK_MAGIC,
+ 0x3a3a3a3a,
+ 0x5c5c5c5c,
+ 0xffffffff,
+ 0xffff0000,
+ };
+ int err = 0;
+
+ /*
+ * Our goal is try and verify that per-context state cannot be
+ * tampered with by another non-privileged client.
+ *
+ * We take the list of context registers from the LRI in the default
+ * context image and attempt to modify that list from a remote context.
+ */
+
+ for_each_engine(engine, gt, id) {
+ int i;
+
+ /* Just don't even ask */
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
+ skip_isolation(engine))
+ continue;
+
+ intel_engine_pm_get(engine);
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ int result;
+
+ result = __lrc_isolation(engine, poison[i]);
+ if (result && !err)
+ err = result;
+
+ result = __lrc_isolation(engine, ~poison[i]);
+ if (result && !err)
+ err = result;
+ }
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int indirect_ctx_submit_req(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+#define CTX_BB_CANARY_OFFSET (3 * 1024)
+#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
+
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(RING_START(0));
+ *cs++ = i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce) +
+ CTX_BB_CANARY_OFFSET;
+ *cs++ = 0;
+
+ return cs;
+}
+
+static void
+indirect_ctx_bb_setup(struct intel_context *ce)
+{
+ u32 *cs = context_indirect_bb(ce);
+
+ cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+ setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+ const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+ LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+
+ if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+ return true;
+
+ pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
+ ctx_bb[CTX_BB_CANARY_INDEX],
+ ce->lrc_reg_state[CTX_RING_START]);
+
+ return false;
+}
+
+static int indirect_ctx_bb_check(struct intel_context *ce)
+{
+ int err;
+
+ err = indirect_ctx_submit_req(ce);
+ if (err)
+ return err;
+
+ if (!check_ring_start(ce))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+{
+ struct intel_context *a, *b;
+ int err;
+
+ a = intel_context_create(engine);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+ err = intel_context_pin(a);
+ if (err)
+ goto put_a;
+
+ b = intel_context_create(engine);
+ if (IS_ERR(b)) {
+ err = PTR_ERR(b);
+ goto unpin_a;
+ }
+ err = intel_context_pin(b);
+ if (err)
+ goto put_b;
+
+ /* We use the already reserved extra page in context state */
+ if (!a->wa_bb_page) {
+ GEM_BUG_ON(b->wa_bb_page);
+ GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+ goto unpin_b;
+ }
+
+ /*
+ * In order to test that our per context bb is truly per context,
+ * and executes at the intended spot on context restoring process,
+ * make the batch store the ring start value to memory.
+ * As ring start is restored apriori of starting the indirect ctx bb and
+ * as it will be different for each context, it fits to this purpose.
+ */
+ indirect_ctx_bb_setup(a);
+ indirect_ctx_bb_setup(b);
+
+ err = indirect_ctx_bb_check(a);
+ if (err)
+ goto unpin_b;
+
+ err = indirect_ctx_bb_check(b);
+
+unpin_b:
+ intel_context_unpin(b);
+put_b:
+ intel_context_put(b);
+unpin_a:
+ intel_context_unpin(a);
+put_a:
+ intel_context_put(a);
+
+ return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_lrc_indirect_ctx_bb(engine);
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static void garbage_reset(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ tasklet_disable(&engine->execlists.tasklet);
+
+ if (!rq->fence.error)
+ intel_engine_reset(engine, NULL);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+ struct rnd_state *prng)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_context_pin(ce);
+ if (err)
+ return ERR_PTR(err);
+
+ prandom_bytes_state(prng,
+ ce->lrc_reg_state,
+ ce->engine->context_size -
+ LRC_STATE_OFFSET);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ return rq;
+
+err_unpin:
+ intel_context_unpin(ce);
+ return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+ struct intel_context *ce;
+ struct i915_request *hang;
+ int err = 0;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ hang = garbage(ce, prng);
+ if (IS_ERR(hang)) {
+ err = PTR_ERR(hang);
+ goto err_ce;
+ }
+
+ if (wait_for_submit(engine, hang, HZ / 2)) {
+ i915_request_put(hang);
+ err = -ETIME;
+ goto err_ce;
+ }
+
+ intel_context_set_banned(ce);
+ garbage_reset(engine, hang);
+
+ intel_engine_flush_submission(engine);
+ if (!hang->fence.error) {
+ i915_request_put(hang);
+ pr_err("%s: corrupted context was not reset\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_ce;
+ }
+
+ if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+ pr_err("%s: corrupted context did not recover\n",
+ engine->name);
+ i915_request_put(hang);
+ err = -EIO;
+ goto err_ce;
+ }
+ i915_request_put(hang);
+
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Verify that we can recover if one context state is completely
+ * corrupted.
+ */
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ I915_RND_STATE(prng);
+ int err = 0, i;
+
+ if (!intel_has_reset_engine(engine->gt))
+ continue;
+
+ intel_engine_pm_get(engine);
+ for (i = 0; i < 3; i++) {
+ err = __lrc_garbage(engine, &prng);
+ if (err)
+ break;
+ }
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ ce->runtime.num_underflow = 0;
+ ce->runtime.max_underflow = 0;
+
+ do {
+ unsigned int loop = 1024;
+
+ while (loop) {
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_rq;
+ }
+
+ if (--loop == 0)
+ i915_request_get(rq);
+
+ i915_request_add(rq);
+ }
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+
+ i915_request_put(rq);
+ } while (1);
+
+ err = i915_request_wait(rq, 0, HZ / 5);
+ if (err < 0) {
+ pr_err("%s: request not completed!\n", engine->name);
+ goto err_wait;
+ }
+
+ igt_flush_test(engine->i915);
+
+ pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+ engine->name,
+ intel_context_get_total_runtime_ns(ce),
+ intel_context_get_avg_runtime_ns(ce));
+
+ err = 0;
+ if (ce->runtime.num_underflow) {
+ pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+ engine->name,
+ ce->runtime.num_underflow,
+ ce->runtime.max_underflow);
+ GEM_TRACE_DUMP();
+ err = -EOVERFLOW;
+ }
+
+err_wait:
+ i915_request_put(rq);
+err_rq:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that cumulative context runtime as stored in the pphwsp[16]
+ * is monotonic.
+ */
+
+ for_each_engine(engine, gt, id) {
+ err = __live_pphwsp_runtime(engine);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_lrc_layout),
+ SUBTEST(live_lrc_fixed),
+ SUBTEST(live_lrc_state),
+ SUBTEST(live_lrc_gpr),
+ SUBTEST(live_lrc_isolation),
+ SUBTEST(live_lrc_timestamp),
+ SUBTEST(live_lrc_garbage),
+ SUBTEST(live_pphwsp_runtime),
+ SUBTEST(live_lrc_indirect_ctx_bb),
+ };
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
@@ -3,8 +3,8 @@
* Copyright © 2014-2019 Intel Corporation
*/
-#include "gt/intel_execlists_submission.h" /* lrc layout */
#include "gt/intel_gt.h"
+#include "gt/intel_lrc.h"
#include "intel_guc_ads.h"
#include "intel_uc.h"
#include "i915_drv.h"
@@ -11,7 +11,7 @@
#include "gt/intel_execlists_submission.h" /* XXX */
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
-#include "gt/intel_lrc_reg.h"
+#include "gt/intel_lrc.h"
#include "gt/intel_ring.h"
#include "intel_guc_submission.h"
@@ -402,6 +402,28 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
memset(execlists->inflight, 0, sizeof(execlists->inflight));
}
+static void lrc_reset(struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ u32 head,
+ bool scrub)
+{
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
+ /*
+ * We want a simple context + ring to execute the breadcrumb update.
+ * We cannot rely on the context being intact across the GPU hang,
+ * so clear it and rebuild just what we need for the breadcrumb.
+ * All pending requests for this context will be zapped, and any
+ * future request will be after userspace has had the opportunity
+ * to recreate its own state.
+ */
+ if (scrub)
+ lrc_restore_defaults(ce, engine);
+
+ /* Rerun the request; its payload has been neutered (if guilty). */
+ lrc_update_regs(ce, engine, head);
+}
+
static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -421,7 +443,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
stalled = false;
__i915_request_reset(rq, stalled);
- intel_lr_context_reset(engine, rq->context, rq->head, stalled);
+ lrc_reset(rq->context, engine, rq->head, stalled);
out_unlock:
spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -38,6 +38,7 @@
#include "gem/i915_gem_pm.h"
#include "gt/intel_context.h"
#include "gt/intel_execlists_submission.h"
+#include "gt/intel_lrc.h"
#include "gt/intel_ring.h"
#include "i915_drv.h"
@@ -201,7 +201,7 @@
#include "gt/intel_execlists_submission.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
-#include "gt/intel_lrc_reg.h"
+#include "gt/intel_lrc.h"
#include "gt/intel_ring.h"
#include "i915_drv.h"