diff mbox series

[v2,2/2] drm/i915/tgl: Gen12 csb support

Message ID 20190805231723.21060-2-daniele.ceraolospurio@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/2] drm/i915/tgl: add GEN12_MAX_CONTEXT_HW_ID | expand

Commit Message

Daniele Ceraolo Spurio Aug. 5, 2019, 11:17 p.m. UTC
The CSB format has been reworked for Gen12 to include information on
both the context we're switching away from and the context we're
switching to. After the change, some of the events don't have their
own bit anymore and need to be inferred from other values in the csb.
One of the context IDs (0x7FF) has also been reserved to indicate
the invalid ctx, i.e. engine idle.

Note that the full context ID includes the SW counter as well, but since
we currently only care if the context is valid or not we can ignore that
part.

v2: fix mask size, fix and expand comments (Tvrtko),
    use if-ladder (Chris)

Bspec: 45555, 46144
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

Comments

Mika Kuoppala Aug. 6, 2019, 2:06 p.m. UTC | #1
Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> writes:

> The CSB format has been reworked for Gen12 to include information on
> both the context we're switching away from and the context we're
> switching to. After the change, some of the events don't have their
> own bit anymore and need to be inferred from other values in the csb.
> One of the context IDs (0x7FF) has also been reserved to indicate
> the invalid ctx, i.e. engine idle.
>
> Note that the full context ID includes the SW counter as well, but since
> we currently only care if the context is valid or not we can ignore that
> part.
>
> v2: fix mask size, fix and expand comments (Tvrtko),
>     use if-ladder (Chris)
>
> Bspec: 45555, 46144
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++++++++++++++++++++-
>  1 file changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 48b7114094f1..6cd756b15098 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -163,6 +163,13 @@
>  
>  #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
>  
> +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
> +#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
> +#define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
> +#define GEN12_IDLE_CTX_ID		0x7FF

Without looking at bspec first I thought FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, ~0)
would emphasize the mask size. But as bspec uses 0x7FF it is well suited
in here too.

> +#define GEN12_CSB_CTX_VALID(csb_dw) \
> +	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
> +
>  /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>  #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>  #define WA_TAIL_DWORDS 2
> @@ -1326,6 +1333,69 @@ enum csb_step {
>  	CSB_COMPLETE,
>  };
>  
> +/*
> + * Starting with Gen12, the status has a new format:
> + *
> + *     bit  0:     switched to new queue
> + *     bit  1:     reserved
> + *     bit  2:     semaphore wait mode (poll or signal), only valid when
> + *                 switch detail is set to "wait on semaphore"
> + *     bits 3-5:   engine class
> + *     bits 6-11:  engine instance
> + *     bits 12-14: reserved
> + *     bits 15-25: sw context id of the lrc the GT switched to
> + *     bits 26-31: sw counter of the lrc the GT switched to
> + *     bits 32-35: context switch detail
> + *                  - 0: ctx complete
> + *                  - 1: wait on sync flip
> + *                  - 2: wait on vblank
> + *                  - 3: wait on scanline
> + *                  - 4: wait on semaphore
> + *                  - 5: context preempted (not on SEMAPHORE_WAIT or
> + *                       WAIT_FOR_EVENT)
> + *     bit  36:    reserved
> + *     bits 37-43: wait detail (for switch detail 1 to 4)
> + *     bits 44-46: reserved
> + *     bits 47-57: sw context id of the lrc the GT switched away from
> + *     bits 58-63: sw counter of the lrc the GT switched away from
> + */
> +static inline enum csb_step
> +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +{
> +	u32 lower_dw = csb[0];
> +	u32 upper_dw = csb[1];
> +	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
> +	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +

For a longer more complex function, the above could be consts to
relieve the readers burden. In here, terse is as good.

> +	if (!ctx_away_valid && ctx_to_valid)
> +		return CSB_PROMOTE;
> +
> +	/*
> +	 * The context switch detail is not guaranteed to be 5 when a preemption
> +	 * occurs, so we can't just check for that. The check below works for
> +	 * all the cases we care about, including preemptions of WAIT
> +	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
> +	 * would require some extra handling, but we don't support that.
> +	 */
> +	if (new_queue && ctx_away_valid)
> +		return CSB_PREEMPT;
> +
> +	/*
> +	 * switch detail = 5 is covered by the case above and we do not expect a
> +	 * context switch on an unsuccessful wait instruction since we always
> +	 * use polling mode.
> +	 */
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
> +
> +	if (*execlists->active) {
> +		GEM_BUG_ON(!ctx_away_valid);
> +		return CSB_COMPLETE;
> +	}
> +
> +	return CSB_NOP;

I don't have a tgl to play with to try it out, but the comments are golden.

> +}
> +
>  static inline enum csb_step
>  csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  {
> @@ -1380,6 +1450,8 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  
>  	do {
> +		enum csb_step csb_step;
> +
>  		if (++head == num_entries)
>  			head = 0;
>  
> @@ -1405,7 +1477,12 @@ static void process_csb(struct intel_engine_cs *engine)
>  			  engine->name, head,
>  			  buf[2 * head + 0], buf[2 * head + 1]);
>  
> -		switch (csb_parse(execlists, buf + 2 * head)) {
> +		if (INTEL_GEN(engine->i915) >= 12)
> +			csb_step = gen12_csb_parse(execlists, buf + 2 * head);
> +		else
> +			csb_step = csb_parse(execlists, buf + 2 * head);

This could have been s/csb_parse/gen8_csb_parse to emphasize the
gen range but I am not insisting.

I remember doing a patch where we read full 64bit at a time,
as we use upper part in tracing as well on gen < 12.
Could be that Chris didn't like it. But now with doing a csb
parsing function pointer for each gen range, it could fly.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
> +		switch (csb_step) {
>  		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
>  			trace_ports(execlists, "preempted", execlists->active);
>  
> -- 
> 2.22.0
Chris Wilson Aug. 6, 2019, 2:17 p.m. UTC | #2
Quoting Mika Kuoppala (2019-08-06 15:06:09)
> I remember doing a patch where we read full 64bit at a time,
> as we use upper part in tracing as well on gen < 12.
> Could be that Chris didn't like it. But now with doing a csb
> parsing function pointer for each gen range, it could fly.

Tracing doesn't exist in production, but if you can show me gcc prefers
one way or another, I'm easily convinced.

* reminder to self, get retpolines and see what happens
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 48b7114094f1..6cd756b15098 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -163,6 +163,13 @@ 
 
 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
+#define GEN12_IDLE_CTX_ID		0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw) \
+	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -1326,6 +1333,69 @@  enum csb_step {
 	CSB_COMPLETE,
 };
 
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ *     bit  0:     switched to new queue
+ *     bit  1:     reserved
+ *     bit  2:     semaphore wait mode (poll or signal), only valid when
+ *                 switch detail is set to "wait on semaphore"
+ *     bits 3-5:   engine class
+ *     bits 6-11:  engine instance
+ *     bits 12-14: reserved
+ *     bits 15-25: sw context id of the lrc the GT switched to
+ *     bits 26-31: sw counter of the lrc the GT switched to
+ *     bits 32-35: context switch detail
+ *                  - 0: ctx complete
+ *                  - 1: wait on sync flip
+ *                  - 2: wait on vblank
+ *                  - 3: wait on scanline
+ *                  - 4: wait on semaphore
+ *                  - 5: context preempted (not on SEMAPHORE_WAIT or
+ *                       WAIT_FOR_EVENT)
+ *     bit  36:    reserved
+ *     bits 37-43: wait detail (for switch detail 1 to 4)
+ *     bits 44-46: reserved
+ *     bits 47-57: sw context id of the lrc the GT switched away from
+ *     bits 58-63: sw counter of the lrc the GT switched away from
+ */
+static inline enum csb_step
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+	u32 lower_dw = csb[0];
+	u32 upper_dw = csb[1];
+	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+	if (!ctx_away_valid && ctx_to_valid)
+		return CSB_PROMOTE;
+
+	/*
+	 * The context switch detail is not guaranteed to be 5 when a preemption
+	 * occurs, so we can't just check for that. The check below works for
+	 * all the cases we care about, including preemptions of WAIT
+	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
+	 * would require some extra handling, but we don't support that.
+	 */
+	if (new_queue && ctx_away_valid)
+		return CSB_PREEMPT;
+
+	/*
+	 * switch detail = 5 is covered by the case above and we do not expect a
+	 * context switch on an unsuccessful wait instruction since we always
+	 * use polling mode.
+	 */
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+
+	if (*execlists->active) {
+		GEM_BUG_ON(!ctx_away_valid);
+		return CSB_COMPLETE;
+	}
+
+	return CSB_NOP;
+}
+
 static inline enum csb_step
 csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 {
@@ -1380,6 +1450,8 @@  static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
+		enum csb_step csb_step;
+
 		if (++head == num_entries)
 			head = 0;
 
@@ -1405,7 +1477,12 @@  static void process_csb(struct intel_engine_cs *engine)
 			  engine->name, head,
 			  buf[2 * head + 0], buf[2 * head + 1]);
 
-		switch (csb_parse(execlists, buf + 2 * head)) {
+		if (INTEL_GEN(engine->i915) >= 12)
+			csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+		else
+			csb_step = csb_parse(execlists, buf + 2 * head);
+
+		switch (csb_step) {
 		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
 			trace_ports(execlists, "preempted", execlists->active);