[3/4] drm/i915/tgl: Gen12 csb support
diff mbox series

Message ID 20190731004902.34672-4-daniele.ceraolospurio@intel.com
State New
Headers show
Series
  • Initial TGL submission changes
Related show

Commit Message

Daniele Ceraolo Spurio July 31, 2019, 12:49 a.m. UTC
The CSB format has been reworked for Gen12 to include information on
both the context we're switching away from and the context we're
switching to. After the change, some of the events don't have their
own bit anymore and need to be inferred from other values in the csb.
One of the context IDs (0x7FF) has also been reserved to indicate
the invalid ctx, i.e. engine idle.

Note that the full context ID includes the SW counter as well, but since
we currently only care if the context is valid or not we can ignore that
part.

Bspec: 45555, 46144
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 ++++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 73 +++++++++++++++++---
 2 files changed, 78 insertions(+), 8 deletions(-)

Comments

Tvrtko Ursulin July 31, 2019, 6:29 a.m. UTC | #1
On 31/07/2019 01:49, Daniele Ceraolo Spurio wrote:
> The CSB format has been reworked for Gen12 to include information on
> both the context we're switching away from and the context we're
> switching to. After the change, some of the events don't have their
> own bit anymore and need to be inferred from other values in the csb.
> One of the context IDs (0x7FF) has also been reserved to indicate
> the invalid ctx, i.e. engine idle.
> 
> Note that the full context ID includes the SW counter as well, but since
> we currently only care if the context is valid or not we can ignore that
> part.
> 
> Bspec: 45555, 46144
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 ++++
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 73 +++++++++++++++++---
>   2 files changed, 78 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index da61dd329210..98adc764d4f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -139,6 +139,13 @@ struct st_preempt_hang {
>   	bool inject_hang;
>   };
>   
> +enum intel_csb_step {
> +	CSB_NOP,
> +	CSB_PROMOTE,
> +	CSB_PREEMPT,
> +	CSB_COMPLETE,
> +};
> +
>   /**
>    * struct intel_engine_execlists - execlist submission queue and port state
>    *
> @@ -251,6 +258,12 @@ struct intel_engine_execlists {
>   	 */
>   	u8 csb_head;
>   
> +	/**
> +	 * @csb_parse: platform-specific function to parse the status buffer
> +	 */
> +	enum intel_csb_step
> +	(*csb_parse)(const struct intel_engine_execlists *, const u32 *csb);

Nitpick - inconsistent naming and not-naming function parameters.

> +
>   	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c379184ac987..00afdcd71bd4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -163,6 +163,13 @@
>   
>   #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
>   
> +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
> +#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
> +#define GEN12_CSB_SW_CTX_ID_MASK		GENMASK_ULL(25, 15)

Does this need to be ULL? Seems to only been used on u32 types. Maybe it 
makes no practical difference...

> +#define GEN12_IDLE_CTX_ID		0x7FF
> +#define GEN12_CSB_CTX_VALID(csb_dw)	\
> +	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
> +
>   /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>   #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>   #define WA_TAIL_DWORDS 2
> @@ -1315,14 +1322,59 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
>   	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>   }
>   
> -enum csb_step {
> -	CSB_NOP,
> -	CSB_PROMOTE,
> -	CSB_PREEMPT,
> -	CSB_COMPLETE,
> -};
> +/*
> + * Starting with Gen12, the status has a new format:
> + *
> + *     bit  0:     switched to new queue
> + *     bit  1:     reserved
> + *     bit  2:     semaphore wait mode (poll or signal), Only valid when

Nitpick - random capitalized word in a sea of non-sentences.

> + *                 switch detail is set to "wait on semaphore"
> + *     bits 3-5:   engine class
> + *     bits 6-11:  engine instance
> + *     bits 12-14: reserved
> + *     bits 15-25: sw context id of the lrc we're switching to
> + *     bits 26-31: sw counter of the lrc we're switching to

I'd perhaps drop the "we are" language from here since it is not we 
(driver) but represents what GPU thinks is happening. "sw context id of 
the lrc GPU switched to"?

> + *     bits 32-35: context switch detail
> + *                  - 0: ctx complete
> + *                  - 1: wait on sync flip
> + *                  - 2: wait on vblank
> + *                  - 3: wait on scanline
> + *                  - 4: wait on semaphore
> + *                  - 5: context preempted (not on SEMAPHORE_WAIT or
> + *                       WAIT_FOR_EVENT)
> + *     bit  36:    reserved
> + *     bits 37-43: wait detail (for switch detail 1 to 4)
> + *     bits 44-46: reserved
> + *     bits 47-57: sw context id of the lrc we're switching away from
> + *     bits 58-63: sw counter of the lrc we're switching away from
> + */
> +static enum intel_csb_step
> +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +{
> +	u32 lower_dw = csb[0];
> +	u32 upper_dw = csb[1];
> +	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
> +	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +
> +	if (!ctx_away_valid && ctx_to_valid)
> +		return CSB_PROMOTE;
> +
> +	if (new_queue && ctx_away_valid)
> +		return CSB_PREEMPT;
>   
> -static inline enum csb_step
> +	/* we do not expect a ctx switch on unsuccessful wait */

What is a wait in this context? Oh the wait ctx switch detail values. 
What about 5 = preempted? Guaranteed to be handled already with the 
"new_queue & ctx_away_valid" check? Should you add 
GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(udw) != 5) on that branch? And expand 
the comment against this assert to explain the above?

> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
> +
> +	if (*execlists->active) {
> +		GEM_BUG_ON(!ctx_away_valid);
> +		return CSB_COMPLETE;
> +	}
> +
> +	return CSB_NOP;
> +}
> +
> +static enum intel_csb_step
>   csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>   {
>   	unsigned int status = *csb;
> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs *engine)
>   			  engine->name, head,
>   			  buf[2 * head + 0], buf[2 * head + 1]);
>   
> -		switch (csb_parse(execlists, buf + 2 * head)) {
> +		switch (execlists->csb_parse(execlists, buf + 2 * head)) {
>   		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
>   			trace_ports(execlists, "preempted", execlists->active);
>   
> @@ -2878,6 +2930,11 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
>   	else
>   		execlists->csb_size = GEN11_CSB_ENTRIES;
>   
> +	if (INTEL_GEN(i915) >= 12)
> +		execlists->csb_parse = gen12_csb_parse;
> +	else
> +		execlists->csb_parse = csb_parse;
> +
>   	reset_csb_pointers(engine);
>   
>   	return 0;
> 

Looks simple. Do you have a reviewer who already knows how Gen12 CSB 
works or someone will need to read up on it to give a proper r-b?

Regards,

Tvrtko
Chris Wilson July 31, 2019, 7:33 a.m. UTC | #2
Quoting Daniele Ceraolo Spurio (2019-07-31 01:49:01)
> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs *engine)
>                           engine->name, head,
>                           buf[2 * head + 0], buf[2 * head + 1]);
>  
> -               switch (csb_parse(execlists, buf + 2 * head)) {
> +               switch (execlists->csb_parse(execlists, buf + 2 * head)) {

So I worry about the cost of a retpoline here (tucked away inside an
irqs-off loop), and whether a local func avoids the retpoline or if we
just have to use an if-ladder.
-Chris
Daniele Ceraolo Spurio July 31, 2019, 5:33 p.m. UTC | #3
On 7/30/19 11:29 PM, Tvrtko Ursulin wrote:
> 
> On 31/07/2019 01:49, Daniele Ceraolo Spurio wrote:
>> The CSB format has been reworked for Gen12 to include information on
>> both the context we're switching away from and the context we're
>> switching to. After the change, some of the events don't have their
>> own bit anymore and need to be inferred from other values in the csb.
>> One of the context IDs (0x7FF) has also been reserved to indicate
>> the invalid ctx, i.e. engine idle.
>>
>> Note that the full context ID includes the SW counter as well, but since
>> we currently only care if the context is valid or not we can ignore that
>> part.
>>
>> Bspec: 45555, 46144
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 ++++
>>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 73 +++++++++++++++++---
>>   2 files changed, 78 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
>> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index da61dd329210..98adc764d4f8 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -139,6 +139,13 @@ struct st_preempt_hang {
>>       bool inject_hang;
>>   };
>> +enum intel_csb_step {
>> +    CSB_NOP,
>> +    CSB_PROMOTE,
>> +    CSB_PREEMPT,
>> +    CSB_COMPLETE,
>> +};
>> +
>>   /**
>>    * struct intel_engine_execlists - execlist submission queue and 
>> port state
>>    *
>> @@ -251,6 +258,12 @@ struct intel_engine_execlists {
>>        */
>>       u8 csb_head;
>> +    /**
>> +     * @csb_parse: platform-specific function to parse the status buffer
>> +     */
>> +    enum intel_csb_step
>> +    (*csb_parse)(const struct intel_engine_execlists *, const u32 *csb);
> 
> Nitpick - inconsistent naming and not-naming function parameters.
> 
>> +
>>       I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
>>   };
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index c379184ac987..00afdcd71bd4 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -163,6 +163,13 @@
>>   #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
>> +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE    (0x1) /* lower csb 
>> dword */
>> +#define GEN12_CTX_SWITCH_DETAIL(csb_dw)    ((csb_dw) & 0xF) /* upper 
>> csb dword */
>> +#define GEN12_CSB_SW_CTX_ID_MASK        GENMASK_ULL(25, 15)
> 
> Does this need to be ULL? Seems to only been used on u32 types. Maybe it 
> makes no practical difference...

Nope, the RFC worked on the 64b status and so needed ULL, I forgot to 
scale it down when moving to working on the separate dwords.

> 
>> +#define GEN12_IDLE_CTX_ID        0x7FF
>> +#define GEN12_CSB_CTX_VALID(csb_dw)    \
>> +    (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
>> +
>>   /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>>   #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>>   #define WA_TAIL_DWORDS 2
>> @@ -1315,14 +1322,59 @@ reset_in_progress(const struct 
>> intel_engine_execlists *execlists)
>>       return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>>   }
>> -enum csb_step {
>> -    CSB_NOP,
>> -    CSB_PROMOTE,
>> -    CSB_PREEMPT,
>> -    CSB_COMPLETE,
>> -};
>> +/*
>> + * Starting with Gen12, the status has a new format:
>> + *
>> + *     bit  0:     switched to new queue
>> + *     bit  1:     reserved
>> + *     bit  2:     semaphore wait mode (poll or signal), Only valid when
> 
> Nitpick - random capitalized word in a sea of non-sentences.
> 
>> + *                 switch detail is set to "wait on semaphore"
>> + *     bits 3-5:   engine class
>> + *     bits 6-11:  engine instance
>> + *     bits 12-14: reserved
>> + *     bits 15-25: sw context id of the lrc we're switching to
>> + *     bits 26-31: sw counter of the lrc we're switching to
> 
> I'd perhaps drop the "we are" language from here since it is not we 
> (driver) but represents what GPU thinks is happening. "sw context id of 
> the lrc GPU switched to"?
> 

ack

>> + *     bits 32-35: context switch detail
>> + *                  - 0: ctx complete
>> + *                  - 1: wait on sync flip
>> + *                  - 2: wait on vblank
>> + *                  - 3: wait on scanline
>> + *                  - 4: wait on semaphore
>> + *                  - 5: context preempted (not on SEMAPHORE_WAIT or
>> + *                       WAIT_FOR_EVENT)
>> + *     bit  36:    reserved
>> + *     bits 37-43: wait detail (for switch detail 1 to 4)
>> + *     bits 44-46: reserved
>> + *     bits 47-57: sw context id of the lrc we're switching away from
>> + *     bits 58-63: sw counter of the lrc we're switching away from
>> + */
>> +static enum intel_csb_step
>> +gen12_csb_parse(const struct intel_engine_execlists *execlists, const 
>> u32 *csb)
>> +{
>> +    u32 lower_dw = csb[0];
>> +    u32 upper_dw = csb[1];
>> +    bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
>> +    bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
>> +    bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>> +
>> +    if (!ctx_away_valid && ctx_to_valid)
>> +        return CSB_PROMOTE;
>> +
>> +    if (new_queue && ctx_away_valid)
>> +        return CSB_PREEMPT;
>> -static inline enum csb_step
>> +    /* we do not expect a ctx switch on unsuccessful wait */
> 
> What is a wait in this context? Oh the wait ctx switch detail values. 

This is the case where the context switches out on a WAIT_FOR_EVENT or 
SEMAPHORE_WAIT because the instruction has been set to switch out of the 
condition is not satisfied (i.e. not polling mode)

> What about 5 = preempted? Guaranteed to be handled already with the 
> "new_queue & ctx_away_valid" check? Should you add 
> GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(udw) != 5) on that branch? And expand 
> the comment against this assert to explain the above?
> 

It should be guaranteed by the case above, but we can't add a BUG_ON 
above because when preempting a polling semaphore we get switch_detail = 
4 and on a lite restore the switch_detail is undefined.

>> +    GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
>> +
>> +    if (*execlists->active) {
>> +        GEM_BUG_ON(!ctx_away_valid);
>> +        return CSB_COMPLETE;
>> +    }
>> +
>> +    return CSB_NOP;
>> +}
>> +
>> +static enum intel_csb_step
>>   csb_parse(const struct intel_engine_execlists *execlists, const u32 
>> *csb)
>>   {
>>       unsigned int status = *csb;
>> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs 
>> *engine)
>>                 engine->name, head,
>>                 buf[2 * head + 0], buf[2 * head + 1]);
>> -        switch (csb_parse(execlists, buf + 2 * head)) {
>> +        switch (execlists->csb_parse(execlists, buf + 2 * head)) {
>>           case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
>>               trace_ports(execlists, "preempted", execlists->active);
>> @@ -2878,6 +2930,11 @@ int intel_execlists_submission_init(struct 
>> intel_engine_cs *engine)
>>       else
>>           execlists->csb_size = GEN11_CSB_ENTRIES;
>> +    if (INTEL_GEN(i915) >= 12)
>> +        execlists->csb_parse = gen12_csb_parse;
>> +    else
>> +        execlists->csb_parse = csb_parse;
>> +
>>       reset_csb_pointers(engine);
>>       return 0;
>>
> 
> Looks simple. Do you have a reviewer who already knows how Gen12 CSB 
> works or someone will need to read up on it to give a proper r-b?

Unfortunately I need a new reviewer (Michel was the one with the 
knowledge). Are you volunteering? :)

Daniele

> 
> Regards,
> 
> Tvrtko
Daniele Ceraolo Spurio July 31, 2019, 8:18 p.m. UTC | #4
On 7/31/19 12:33 AM, Chris Wilson wrote:
> Quoting Daniele Ceraolo Spurio (2019-07-31 01:49:01)
>> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs *engine)
>>                            engine->name, head,
>>                            buf[2 * head + 0], buf[2 * head + 1]);
>>   
>> -               switch (csb_parse(execlists, buf + 2 * head)) {
>> +               switch (execlists->csb_parse(execlists, buf + 2 * head)) {
> 
> So I worry about the cost of a retpoline here (tucked away inside an
> irqs-off loop), and whether a local func avoids the retpoline or if we
> just have to use an if-ladder.
> -Chris
> 

I've tried with:

static enum intel_csb_step
(*csb_parse[])(const struct intel_engine_execlists *, const u32 *) = {
	[CSB_GEN8] = gen8_csb_parse,
	[CSB_GEN12] = gen12_csb_parse,
};

switch (csb_parse[execlists->csb_format](..))

But AFAICS from the objdump the assembly code generated with GCC 8.3 and 
CONFIG_RETPOLINE=y is more or less the same, there is just 2 extra mov 
instructions when using the array of functions. I can't spot the 
retpoline in neither case thought, so not sure if I'm missing something. 
Should I just go with an if-else?

Daniele

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index da61dd329210..98adc764d4f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -139,6 +139,13 @@  struct st_preempt_hang {
 	bool inject_hang;
 };
 
+enum intel_csb_step {
+	CSB_NOP,
+	CSB_PROMOTE,
+	CSB_PREEMPT,
+	CSB_COMPLETE,
+};
+
 /**
  * struct intel_engine_execlists - execlist submission queue and port state
  *
@@ -251,6 +258,12 @@  struct intel_engine_execlists {
 	 */
 	u8 csb_head;
 
+	/**
+	 * @csb_parse: platform-specific function to parse the status buffer
+	 */
+	enum intel_csb_step
+	(*csb_parse)(const struct intel_engine_execlists *, const u32 *csb);
+
 	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
 };
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c379184ac987..00afdcd71bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -163,6 +163,13 @@ 
 
 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK		GENMASK_ULL(25, 15)
+#define GEN12_IDLE_CTX_ID		0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw)	\
+	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -1315,14 +1322,59 @@  reset_in_progress(const struct intel_engine_execlists *execlists)
 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
 }
 
-enum csb_step {
-	CSB_NOP,
-	CSB_PROMOTE,
-	CSB_PREEMPT,
-	CSB_COMPLETE,
-};
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ *     bit  0:     switched to new queue
+ *     bit  1:     reserved
+ *     bit  2:     semaphore wait mode (poll or signal), Only valid when
+ *                 switch detail is set to "wait on semaphore"
+ *     bits 3-5:   engine class
+ *     bits 6-11:  engine instance
+ *     bits 12-14: reserved
+ *     bits 15-25: sw context id of the lrc we're switching to
+ *     bits 26-31: sw counter of the lrc we're switching to
+ *     bits 32-35: context switch detail
+ *                  - 0: ctx complete
+ *                  - 1: wait on sync flip
+ *                  - 2: wait on vblank
+ *                  - 3: wait on scanline
+ *                  - 4: wait on semaphore
+ *                  - 5: context preempted (not on SEMAPHORE_WAIT or
+ *                       WAIT_FOR_EVENT)
+ *     bit  36:    reserved
+ *     bits 37-43: wait detail (for switch detail 1 to 4)
+ *     bits 44-46: reserved
+ *     bits 47-57: sw context id of the lrc we're switching away from
+ *     bits 58-63: sw counter of the lrc we're switching away from
+ */
+static enum intel_csb_step
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+	u32 lower_dw = csb[0];
+	u32 upper_dw = csb[1];
+	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+	if (!ctx_away_valid && ctx_to_valid)
+		return CSB_PROMOTE;
+
+	if (new_queue && ctx_away_valid)
+		return CSB_PREEMPT;
 
-static inline enum csb_step
+	/* we do not expect a ctx switch on unsuccessful wait */
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+
+	if (*execlists->active) {
+		GEM_BUG_ON(!ctx_away_valid);
+		return CSB_COMPLETE;
+	}
+
+	return CSB_NOP;
+}
+
+static enum intel_csb_step
 csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 {
 	unsigned int status = *csb;
@@ -1401,7 +1453,7 @@  static void process_csb(struct intel_engine_cs *engine)
 			  engine->name, head,
 			  buf[2 * head + 0], buf[2 * head + 1]);
 
-		switch (csb_parse(execlists, buf + 2 * head)) {
+		switch (execlists->csb_parse(execlists, buf + 2 * head)) {
 		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
 			trace_ports(execlists, "preempted", execlists->active);
 
@@ -2878,6 +2930,11 @@  int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	else
 		execlists->csb_size = GEN11_CSB_ENTRIES;
 
+	if (INTEL_GEN(i915) >= 12)
+		execlists->csb_parse = gen12_csb_parse;
+	else
+		execlists->csb_parse = csb_parse;
+
 	reset_csb_pointers(engine);
 
 	return 0;