diff mbox series

[RFC] drm/i915/tgl: Gen12 csb support

Message ID 20190627203225.3427-1-daniele.ceraolospurio@intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC] drm/i915/tgl: Gen12 csb support | expand

Commit Message

Daniele Ceraolo Spurio June 27, 2019, 8:32 p.m. UTC
The CSB format has been reworked for Gen12 to include information on
both the context we're switching away from and the context we're
switching to. After the change, some of the events don't have their
own bit anymore and need to be inferred from other values in the csb.
One of the context IDs (0x7FF) has also been reserved to indicate
the invalid ctx, i.e. engine idle.
To keep the logic simple, we can convert the new gen12 format to the
gen11 format and re-use the legacy csb handling logic. This result
in a sliglthly slower handling for gen12 but allows us to get running
with a relatively small change and avoids code duplication.

RFC: base TGL support [1] is not yet in the tree, but I wanted to get
some early comments on this because I'm not totally convinced that the
conversion is the best way of doing this. This way if another approach
is proposed I can do the work while the base support gets reviewed. If
we stick with the conversion we can probably optimize a bit.
Toughts?

[1]: https://patchwork.freedesktop.org/series/62726/

Bspec: 45555, 46144
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 93 ++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 2 deletions(-)

Comments

Chris Wilson June 27, 2019, 9:06 p.m. UTC | #1
Quoting Daniele Ceraolo Spurio (2019-06-27 21:32:25)
> The CSB format has been reworked for Gen12 to include information on
> both the context we're switching away from and the context we're
> switching to. After the change, some of the events don't have their
> own bit anymore and need to be inferred from other values in the csb.
> One of the context IDs (0x7FF) has also been reserved to indicate
> the invalid ctx, i.e. engine idle.
> To keep the logic simple, we can convert the new gen12 format to the
> gen11 format and re-use the legacy csb handling logic. This result
> in a sliglthly slower handling for gen12 but allows us to get running
> with a relatively small change and avoids code duplication.
> 
> RFC: base TGL support [1] is not yet in the tree, but I wanted to get
> some early comments on this because I'm not totally convinced that the
> conversion is the best way of doing this. This way if another approach
> is proposed I can do the work while the base support gets reviewed. If
> we stick with the conversion we can probably optimize a bit.
> Toughts?

We only require a few states and so I wonder if we should try to map to
the current internal states

switch (csb_parse_fn[gen](buf + i)) {
	case PREEMPT:
		...
		/* fallthrough */
	case PROMOTE:
		break;

	case COMPLETE:
		break;

	default: /* boring non-event */
		break;
}

I hope function pointers like that are not severely penalized nowadays.

Basically instead of compiling the mask, we have a sequence of check
and return. If we keep on logging the full CSB[] that is at least the
information we need to verify the events against our state tracking,
except that we push the effort out to the debugger to parse the event
details.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 471e134de186..61cf016b8e15 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -164,6 +164,15 @@ 
 
 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	(csb_dw & 0xF) /* upper csb dword */
+#define  GEN12_CTX_COMPLETE		(0x0)
+#define  GEN12_CTX_PREEMPTED		(0x5)
+#define GEN12_CSB_CTX(csb_dw)		(((csb_dw) & GENMASK_ULL(31, 15)) >> 15)
+#define GEN12_CTX_SW_ID(ctx)		((ctx) & GENMASK(10, 0))
+#define GEN12_CTX_SW_COUNTER(ctx)	(((ctx) & GENMASK(16, 11)) >> 11)
+#define GEN12_CTX_INVALID_ID		0x7FF
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -1279,6 +1288,80 @@  reset_in_progress(const struct intel_engine_execlists *execlists)
 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
 }
 
+/*
+ * Starting with Gen12, the csb has a new format:
+ *
+ * lower_dw:
+ *   bit  0:     switched to new queue
+ *   bit  1:     reserved
+ *   bits 3-5:   engine class
+ *   bits 6-11:  engine instance
+ *   bits 12-14: reserved
+ *   bits 15-25: sw context id of the lrc we're switching to
+ *   bits 26-31: sw counter of the lrc we're switching to
+ *
+ * upper_dw:
+ *   bits 0-3:   context switch detail
+ *                 0: ctx complete
+ *                 1: wait on sync flip
+ *                 2: wait on vblank
+ *                 3: wait on scanline
+ *                 4: wait on semaphore
+ *                 5: ctx preempted (not on SEMAPHORE_WAIT or WAIT_FOR_EVENT)
+ *   bit  4:     reserved
+ *   bits 5-11:  wait detail (for switch detail 1 to 4)
+ *   bits 12-14: reserved
+ *   bits 15-25: sw context id of the lrc we're switching away from
+ *   bits 26-31: sw counter of the lrc we're switching away from
+ *
+ * To keep the logic simple, we convert the new gen12 format to the gen11
+ * format and re-use the legacy csb handling logic. The upper 32 bits of the
+ * legacy status should be set to the upper 32 bits of the descriptor of the
+ * ctx we're switching away from, but since we don't use that value in our
+ * logic at the moment we skip that setting.
+ */
+static u32
+gen12_convert_csb(struct intel_engine_cs *engine, u32 lower_dw, u32 upper_dw)
+{
+	u32 legacy_status = 0;
+	u32 ctx_to = GEN12_CSB_CTX(lower_dw);
+	u32 ctx_away = GEN12_CSB_CTX(upper_dw);
+	u8 switch_detail = GEN12_CTX_SWITCH_DETAIL(upper_dw);
+	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	bool ctx_to_valid = GEN12_CTX_SW_ID(ctx_to) != GEN12_CTX_INVALID_ID;
+	bool ctx_away_valid = GEN12_CTX_SW_ID(ctx_away) != GEN12_CTX_INVALID_ID;
+
+	if (!ctx_away_valid && ctx_to_valid)
+		legacy_status |= GEN8_CTX_STATUS_IDLE_ACTIVE;
+
+	/*
+	 * ctx_away_valid && !ctx_to_valid can indicate either a ctx completion
+	 * or a preempt-to-idle of a running ctx. The 2 cases can be
+	 * differentiated by the value of new_queue, but since the
+	 * GEN8_CTX_STATUS_ACTIVE_IDLE is set in both situations we don't
+	 * differentiate between them here.
+	 */
+	if (ctx_away_valid && !ctx_to_valid)
+		legacy_status |= GEN8_CTX_STATUS_ACTIVE_IDLE;
+
+	if ((ctx_away == ctx_to) && ctx_to_valid)
+		legacy_status |= GEN8_CTX_STATUS_LITE_RESTORE;
+
+	if ((ctx_away != ctx_to) && ctx_away_valid && ctx_to_valid && !new_queue)
+		legacy_status |= GEN8_CTX_STATUS_ELEMENT_SWITCH;
+
+	if ((switch_detail == GEN12_CTX_PREEMPTED) || (new_queue && ctx_away_valid))
+		legacy_status |= GEN8_CTX_STATUS_PREEMPTED;
+
+	if ((switch_detail == GEN12_CTX_COMPLETE) && ctx_away_valid)
+		legacy_status |= GEN8_CTX_STATUS_COMPLETE;
+
+	/* add other events as required (e.g. semaphore wait) */
+	GEM_BUG_ON(!legacy_status);
+
+	return legacy_status;
+}
+
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1316,7 +1399,7 @@  static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		unsigned int status;
+		u32 status;
 
 		if (++head == num_entries)
 			head = 0;
@@ -1343,7 +1426,13 @@  static void process_csb(struct intel_engine_cs *engine)
 			  engine->name, head,
 			  buf[2 * head + 0], buf[2 * head + 1]);
 
-		status = buf[2 * head];
+		if (INTEL_GEN(engine->i915) >= 12)
+			status = gen12_convert_csb(engine, buf[2 * head], buf[2 * head + 1]);
+		else
+			status = buf[2 * head];
+
+		/* don't read from buf[] after this point */
+
 		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
 			GEM_BUG_ON(*execlists->active);
 promote: