diff mbox series

[04/12] drm/i915/execlists: Refactor CSB state machine

Message ID 20190701100502.15639-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/12] drm/i915/guc: Avoid reclaim locks during reset | expand

Commit Message

Chris Wilson July 1, 2019, 10:04 a.m. UTC
Daniele pointed out that the CSB status information will change with
Tigerlake and suggested that we could rearrange our state machine to
hide the differences in generation. gcc also prefers the explicit state
machine, so make it so:

process_csb                                 1980    1967     -13

Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 20 deletions(-)

Comments

Mika Kuoppala July 1, 2019, 11:49 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Daniele pointed out that the CSB status information will change with
> Tigerlake and suggested that we could rearrange our state machine to
> hide the differences in generation. gcc also prefers the explicit state
> machine, so make it so:
>
> process_csb                                 1980    1967     -13
>
> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++---------
>  1 file changed, 44 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 471e134de186..953b3938a85f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
>  	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>  }
>  
> +enum csb_step {
> +	CSB_NOP,
> +	CSB_PROMOTE,
> +	CSB_PREEMPT,
> +	CSB_COMPLETE,
> +};
> +
> +static inline enum csb_step
> +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +{
> +	unsigned int status = *csb;

Could be const u32 aswell (stylistic).

Just makes me ponder why you want to read csb in here
and not in the callsite.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
> +	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
> +		return CSB_PROMOTE;
> +
> +	if (status & GEN8_CTX_STATUS_PREEMPTED)
> +		return CSB_PREEMPT;
> +
> +	if (*execlists->active)
> +		return CSB_COMPLETE;
> +
> +	return CSB_NOP;
> +}
> +
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1316,8 +1340,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  
>  	do {
> -		unsigned int status;
> -
>  		if (++head == num_entries)
>  			head = 0;
>  
> @@ -1343,10 +1365,16 @@ static void process_csb(struct intel_engine_cs *engine)
>  			  engine->name, head,
>  			  buf[2 * head + 0], buf[2 * head + 1]);
>  
> -		status = buf[2 * head];
> -		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
> +		switch (csb_parse(execlists, buf + 2 * head)) {
> +		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
> +			trace_ports(execlists, "preempted", execlists->active);
> +
> +			while (*execlists->active)
> +				execlists_schedule_out(*execlists->active++);
> +
> +			/* fallthrough */
> +		case CSB_PROMOTE: /* switch pending to inflight */
>  			GEM_BUG_ON(*execlists->active);
> -promote:
>  			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
>  			execlists->active =
>  				memcpy(execlists->inflight,
> @@ -1355,25 +1383,17 @@ static void process_csb(struct intel_engine_cs *engine)
>  				       sizeof(*execlists->pending));
>  			execlists->pending[0] = NULL;
>  
> +			trace_ports(execlists, "promoted", execlists->active);
> +
>  			if (enable_timeslice(engine))
>  				mod_timer(&execlists->timer, jiffies + 1);
>  
>  			if (!inject_preempt_hang(execlists))
>  				ring_set_paused(engine, 0);
> -		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
> -			struct i915_request * const *port = execlists->active;
> -
> -			trace_ports(execlists, "preempted", execlists->active);
> -
> -			while (*port)
> -				execlists_schedule_out(*port++);
> -
> -			goto promote;
> -		} else if (*execlists->active) {
> -			struct i915_request *rq = *execlists->active++;
> +			break;
>  
> -			trace_ports(execlists, "completed",
> -				    execlists->active - 1);
> +		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
> +			trace_ports(execlists, "completed", execlists->active);
>  
>  			/*
>  			 * We rely on the hardware being strongly
> @@ -1381,11 +1401,15 @@ static void process_csb(struct intel_engine_cs *engine)
>  			 * coherent (visible from the CPU) before the
>  			 * user interrupt and CSB is processed.
>  			 */
> -			GEM_BUG_ON(!i915_request_completed(rq));
> -			execlists_schedule_out(rq);
> +			GEM_BUG_ON(!i915_request_completed(*execlists->active));
> +			execlists_schedule_out(*execlists->active++);
>  
>  			GEM_BUG_ON(execlists->active - execlists->inflight >
>  				   execlists_num_ports(execlists));
> +			break;
> +
> +		case CSB_NOP:
> +			break;
>  		}
>  	} while (head != tail);
>  
> -- 
> 2.20.1
Chris Wilson July 1, 2019, 1:50 p.m. UTC | #2
Quoting Mika Kuoppala (2019-07-01 12:49:48)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Daniele pointed out that the CSB status information will change with
> > Tigerlake and suggested that we could rearrange our state machine to
> > hide the differences in generation. gcc also prefers the explicit state
> > machine, so make it so:
> >
> > process_csb                                 1980    1967     -13
> >
> > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++---------
> >  1 file changed, 44 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 471e134de186..953b3938a85f 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
> >       return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
> >  }
> >  
> > +enum csb_step {
> > +     CSB_NOP,
> > +     CSB_PROMOTE,
> > +     CSB_PREEMPT,
> > +     CSB_COMPLETE,
> > +};
> > +
> > +static inline enum csb_step
> > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> > +{
> > +     unsigned int status = *csb;
> 
> Could be const u32 aswell (stylistic).

No need to specify here, local register is fine, so left it as natural.

> Just makes me ponder why you want to read csb in here
> and not in the callsite.

Whatever gcc prefers when there is multiple csb_parsers. :)
-Chris
Daniele Ceraolo Spurio July 1, 2019, 6:28 p.m. UTC | #3
On 7/1/19 3:04 AM, Chris Wilson wrote:
> Daniele pointed out that the CSB status information will change with
> Tigerlake and suggested that we could rearrange our state machine to
> hide the differences in generation. gcc also prefers the explicit state
> machine, so make it so:
> 
> process_csb                                 1980    1967     -13
> 
> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

To be fair the suggestion came from you...

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++---------
>   1 file changed, 44 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 471e134de186..953b3938a85f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
>   	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>   }
>   
> +enum csb_step {
> +	CSB_NOP,
> +	CSB_PROMOTE,
> +	CSB_PREEMPT,
> +	CSB_COMPLETE,
> +};
> +
> +static inline enum csb_step
> +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +{
> +	unsigned int status = *csb;
> +
> +	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
> +		return CSB_PROMOTE;
> +
> +	if (status & GEN8_CTX_STATUS_PREEMPTED)
> +		return CSB_PREEMPT;
> +
> +	if (*execlists->active)
> +		return CSB_COMPLETE;

I think the CSB_COMPLETE case is going to be the same across the various 
csb parsers since we don't even look at the complete bit in the CSB, but 
I'm undecided if it'd indeed be cleaner to have it outside or not, e.g.:

	switch (csb_parse(...)) {
	case CSB_PREEMPT:
		[...]
	case CSB_PROMOTE:
		[...]
	default:
		if (!*execlists->active)
			break;
		[...]

we can reconsider when the TGL parser is added.

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

> +
> +	return CSB_NOP;
> +}
> +
>   static void process_csb(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1316,8 +1340,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   	rmb();
>   
>   	do {
> -		unsigned int status;
> -
>   		if (++head == num_entries)
>   			head = 0;
>   
> @@ -1343,10 +1365,16 @@ static void process_csb(struct intel_engine_cs *engine)
>   			  engine->name, head,
>   			  buf[2 * head + 0], buf[2 * head + 1]);
>   
> -		status = buf[2 * head];
> -		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
> +		switch (csb_parse(execlists, buf + 2 * head)) {
> +		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
> +			trace_ports(execlists, "preempted", execlists->active);
> +
> +			while (*execlists->active)
> +				execlists_schedule_out(*execlists->active++);
> +
> +			/* fallthrough */
> +		case CSB_PROMOTE: /* switch pending to inflight */
>   			GEM_BUG_ON(*execlists->active);
> -promote:
>   			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
>   			execlists->active =
>   				memcpy(execlists->inflight,
> @@ -1355,25 +1383,17 @@ static void process_csb(struct intel_engine_cs *engine)
>   				       sizeof(*execlists->pending));
>   			execlists->pending[0] = NULL;
>   
> +			trace_ports(execlists, "promoted", execlists->active);
> +
>   			if (enable_timeslice(engine))
>   				mod_timer(&execlists->timer, jiffies + 1);
>   
>   			if (!inject_preempt_hang(execlists))
>   				ring_set_paused(engine, 0);
> -		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
> -			struct i915_request * const *port = execlists->active;
> -
> -			trace_ports(execlists, "preempted", execlists->active);
> -
> -			while (*port)
> -				execlists_schedule_out(*port++);
> -
> -			goto promote;
> -		} else if (*execlists->active) {
> -			struct i915_request *rq = *execlists->active++;
> +			break;
>   
> -			trace_ports(execlists, "completed",
> -				    execlists->active - 1);
> +		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
> +			trace_ports(execlists, "completed", execlists->active);
>   
>   			/*
>   			 * We rely on the hardware being strongly
> @@ -1381,11 +1401,15 @@ static void process_csb(struct intel_engine_cs *engine)
>   			 * coherent (visible from the CPU) before the
>   			 * user interrupt and CSB is processed.
>   			 */
> -			GEM_BUG_ON(!i915_request_completed(rq));
> -			execlists_schedule_out(rq);
> +			GEM_BUG_ON(!i915_request_completed(*execlists->active));
> +			execlists_schedule_out(*execlists->active++);
>   
>   			GEM_BUG_ON(execlists->active - execlists->inflight >
>   				   execlists_num_ports(execlists));
> +			break;
> +
> +		case CSB_NOP:
> +			break;
>   		}
>   	} while (head != tail);
>   
>
Mika Kuoppala July 2, 2019, 8:36 a.m. UTC | #4
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-01 12:49:48)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Daniele pointed out that the CSB status information will change with
>> > Tigerlake and suggested that we could rearrange our state machine to
>> > hide the differences in generation. gcc also prefers the explicit state
>> > machine, so make it so:
>> >
>> > process_csb                                 1980    1967     -13
>> >
>> > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++---------
>> >  1 file changed, 44 insertions(+), 20 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > index 471e134de186..953b3938a85f 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
>> >       return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>> >  }
>> >  
>> > +enum csb_step {
>> > +     CSB_NOP,
>> > +     CSB_PROMOTE,
>> > +     CSB_PREEMPT,
>> > +     CSB_COMPLETE,
>> > +};
>> > +
>> > +static inline enum csb_step
>> > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>> > +{
>> > +     unsigned int status = *csb;
>> 
>> Could be const u32 aswell (stylistic).
>
> No need to specify here, local register is fine, so left it as natural.
>

In this case the function is small and obvious so that is
why stylistic.

But for more complex one, it takes away reviewers
burden as you can read something as const and
then the complexity tree you need to manage between
your ears shrinks when you read further down.

I can also remember atleast few cases where
it has prevented an unwanted accidental write
into propagating past compiler.

>> Just makes me ponder why you want to read csb in here
>> and not in the callsite.
>
> Whatever gcc prefers when there is multiple csb_parsers. :)

It changes to a better produced code? Surely reason
enough.

-Mika
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 471e134de186..953b3938a85f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1279,6 +1279,30 @@  reset_in_progress(const struct intel_engine_execlists *execlists)
 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
 }
 
+enum csb_step {
+	CSB_NOP,
+	CSB_PROMOTE,
+	CSB_PREEMPT,
+	CSB_COMPLETE,
+};
+
+static inline enum csb_step
+csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+	unsigned int status = *csb;
+
+	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
+		return CSB_PROMOTE;
+
+	if (status & GEN8_CTX_STATUS_PREEMPTED)
+		return CSB_PREEMPT;
+
+	if (*execlists->active)
+		return CSB_COMPLETE;
+
+	return CSB_NOP;
+}
+
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1316,8 +1340,6 @@  static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		unsigned int status;
-
 		if (++head == num_entries)
 			head = 0;
 
@@ -1343,10 +1365,16 @@  static void process_csb(struct intel_engine_cs *engine)
 			  engine->name, head,
 			  buf[2 * head + 0], buf[2 * head + 1]);
 
-		status = buf[2 * head];
-		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+		switch (csb_parse(execlists, buf + 2 * head)) {
+		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
+			trace_ports(execlists, "preempted", execlists->active);
+
+			while (*execlists->active)
+				execlists_schedule_out(*execlists->active++);
+
+			/* fallthrough */
+		case CSB_PROMOTE: /* switch pending to inflight */
 			GEM_BUG_ON(*execlists->active);
-promote:
 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
 			execlists->active =
 				memcpy(execlists->inflight,
@@ -1355,25 +1383,17 @@  static void process_csb(struct intel_engine_cs *engine)
 				       sizeof(*execlists->pending));
 			execlists->pending[0] = NULL;
 
+			trace_ports(execlists, "promoted", execlists->active);
+
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
-		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
-			struct i915_request * const *port = execlists->active;
-
-			trace_ports(execlists, "preempted", execlists->active);
-
-			while (*port)
-				execlists_schedule_out(*port++);
-
-			goto promote;
-		} else if (*execlists->active) {
-			struct i915_request *rq = *execlists->active++;
+			break;
 
-			trace_ports(execlists, "completed",
-				    execlists->active - 1);
+		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+			trace_ports(execlists, "completed", execlists->active);
 
 			/*
 			 * We rely on the hardware being strongly
@@ -1381,11 +1401,15 @@  static void process_csb(struct intel_engine_cs *engine)
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
 			 */
-			GEM_BUG_ON(!i915_request_completed(rq));
-			execlists_schedule_out(rq);
+			GEM_BUG_ON(!i915_request_completed(*execlists->active));
+			execlists_schedule_out(*execlists->active++);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
 				   execlists_num_ports(execlists));
+			break;
+
+		case CSB_NOP:
+			break;
 		}
 	} while (head != tail);