[01/11] drm/i915/execlists: Avoid kicking the submission too early for rescheduling
diff mbox

Message ID 20180326115044.2505-2-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson March 26, 2018, 11:50 a.m. UTC
If the request is still waiting on external fences, it has not yet been
submitted to the HW queue and so we can forgo kicking the submission
tasklet when re-evaluating its priority.

This should have no impact other than reducing the number of tasklet
wakeups under signal heavy workloads (e.g. switching between engines).

v2: Use prebaked container_of()

References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Mika Kuoppala March 27, 2018, 11:34 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> If the request is still waiting on external fences, it has not yet been
> submitted to the HW queue and so we can forgo kicking the submission
> tasklet when re-evaluating its priority.
>
> This should have no impact other than reducing the number of tasklet
> wakeups under signal heavy workloads (e.g. switching between engines).
>
> v2: Use prebaked container_of()
>
> References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b4ab06b05e58..104b69e0494f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1051,12 +1051,16 @@ static void queue_request(struct intel_engine_cs *engine,
>  	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
>  }
>  
> -static void submit_queue(struct intel_engine_cs *engine, int prio)
> +static void __submit_queue(struct intel_engine_cs *engine, int prio)
>  {
> -	if (prio > engine->execlists.queue_priority) {
>  		engine->execlists.queue_priority = prio;
>  		tasklet_hi_schedule(&engine->execlists.tasklet);
> -	}
> +}
> +
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> +	if (prio > engine->execlists.queue_priority)
> +		__submit_queue(engine, prio);

You did this...

>  }
>  
>  static void execlists_submit_request(struct i915_request *request)
> @@ -1189,7 +1193,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
>  			__list_del_entry(&pt->link);
>  			queue_request(engine, pt, prio);
>  		}
> -		submit_queue(engine, prio);
> +
> +		if (prio > engine->execlists.queue_priority &&
> +		    i915_sw_fence_done(&pt_to_request(pt)->submit))
> +			__submit_queue(engine, prio);

..to have explicit priority comparison on submit callsite I gather.
Or is there some other reason?

-Mika

>  	}
>  
>  	spin_unlock_irq(&engine->timeline->lock);
> -- 
> 2.16.3
Mika Kuoppala March 27, 2018, 11:42 a.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> If the request is still waiting on external fences, it has not yet been
> submitted to the HW queue and so we can forgo kicking the submission
> tasklet when re-evaluating its priority.
>
> This should have no impact other than reducing the number of tasklet
> wakeups under signal heavy workloads (e.g. switching between engines).
>
> v2: Use prebaked container_of()
>
> References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b4ab06b05e58..104b69e0494f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1051,12 +1051,16 @@ static void queue_request(struct intel_engine_cs *engine,
>  	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
>  }
>  
> -static void submit_queue(struct intel_engine_cs *engine, int prio)
> +static void __submit_queue(struct intel_engine_cs *engine, int prio)
>  {
> -	if (prio > engine->execlists.queue_priority) {
>  		engine->execlists.queue_priority = prio;
>  		tasklet_hi_schedule(&engine->execlists.tasklet);

You need to fix indent in here.
-Mika


> -	}
> +}
> +
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> +	if (prio > engine->execlists.queue_priority)
> +		__submit_queue(engine, prio);
>  }
>  
>  static void execlists_submit_request(struct i915_request *request)
> @@ -1189,7 +1193,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
>  			__list_del_entry(&pt->link);
>  			queue_request(engine, pt, prio);
>  		}
> -		submit_queue(engine, prio);
> +
> +		if (prio > engine->execlists.queue_priority &&
> +		    i915_sw_fence_done(&pt_to_request(pt)->submit))
> +			__submit_queue(engine, prio);
>  	}
>  
>  	spin_unlock_irq(&engine->timeline->lock);
> -- 
> 2.16.3
Chris Wilson March 27, 2018, 11:47 a.m. UTC | #3
Quoting Mika Kuoppala (2018-03-27 12:34:31)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > If the request is still waiting on external fences, it has not yet been
> > submitted to the HW queue and so we can forgo kicking the submission
> > tasklet when re-evaluating its priority.
> >
> > This should have no impact other than reducing the number of tasklet
> > wakeups under signal heavy workloads (e.g. switching between engines).
> >
> > v2: Use prebaked container_of()
> >
> > References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
> >  1 file changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index b4ab06b05e58..104b69e0494f 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -1051,12 +1051,16 @@ static void queue_request(struct intel_engine_cs *engine,
> >       list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> >  }
> >  
> > -static void submit_queue(struct intel_engine_cs *engine, int prio)
> > +static void __submit_queue(struct intel_engine_cs *engine, int prio)
> >  {
> > -     if (prio > engine->execlists.queue_priority) {
> >               engine->execlists.queue_priority = prio;
> >               tasklet_hi_schedule(&engine->execlists.tasklet);
> > -     }
> > +}
> > +
> > +static void submit_queue(struct intel_engine_cs *engine, int prio)
> > +{
> > +     if (prio > engine->execlists.queue_priority)
> > +             __submit_queue(engine, prio);
> 
> You did this...
> 
> >  }
> >  
> >  static void execlists_submit_request(struct i915_request *request)
> > @@ -1189,7 +1193,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
> >                       __list_del_entry(&pt->link);
> >                       queue_request(engine, pt, prio);
> >               }
> > -             submit_queue(engine, prio);
> > +
> > +             if (prio > engine->execlists.queue_priority &&
> > +                 i915_sw_fence_done(&pt_to_request(pt)->submit))
> > +                     __submit_queue(engine, prio);
> 
> ..to have explicit priority comparison on submit callsite I gather.
> Or is there some other reason?

No, just because I wanted both checks in this case. On the other path
i915_sw_fence_done() isn't technically true as we are in process of
performing the i915_sw_fence callback, so just i915_sw_fence_signaled().
But we don't want to use i915_sw_fence_signaled() here as I don't want
to think about the race. :)

So since prio > engine.queue_priority should be cheaper than loading the
cacheline for the request->submit.flags, I wanted that tested first as
it will only fire, at most, once per engine.
-Chris
Mika Kuoppala March 27, 2018, 12:18 p.m. UTC | #4
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-03-27 12:34:31)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > If the request is still waiting on external fences, it has not yet been
>> > submitted to the HW queue and so we can forgo kicking the submission
>> > tasklet when re-evaluating its priority.
>> >
>> > This should have no impact other than reducing the number of tasklet
>> > wakeups under signal heavy workloads (e.g. switching between engines).
>> >
>> > v2: Use prebaked container_of()
>> >
>> > References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Michał Winiarski <michal.winiarski@intel.com>
>> > Cc: Michel Thierry <michel.thierry@intel.com>
>> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
>> >  1 file changed, 11 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> > index b4ab06b05e58..104b69e0494f 100644
>> > --- a/drivers/gpu/drm/i915/intel_lrc.c
>> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> > @@ -1051,12 +1051,16 @@ static void queue_request(struct intel_engine_cs *engine,
>> >       list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
>> >  }
>> >  
>> > -static void submit_queue(struct intel_engine_cs *engine, int prio)
>> > +static void __submit_queue(struct intel_engine_cs *engine, int prio)
>> >  {
>> > -     if (prio > engine->execlists.queue_priority) {
>> >               engine->execlists.queue_priority = prio;
>> >               tasklet_hi_schedule(&engine->execlists.tasklet);
>> > -     }
>> > +}
>> > +
>> > +static void submit_queue(struct intel_engine_cs *engine, int prio)
>> > +{
>> > +     if (prio > engine->execlists.queue_priority)
>> > +             __submit_queue(engine, prio);
>> 
>> You did this...
>> 
>> >  }
>> >  
>> >  static void execlists_submit_request(struct i915_request *request)
>> > @@ -1189,7 +1193,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
>> >                       __list_del_entry(&pt->link);
>> >                       queue_request(engine, pt, prio);
>> >               }
>> > -             submit_queue(engine, prio);
>> > +
>> > +             if (prio > engine->execlists.queue_priority &&
>> > +                 i915_sw_fence_done(&pt_to_request(pt)->submit))
>> > +                     __submit_queue(engine, prio);
>> 
>> ..to have explicit priority comparison on submit callsite I gather.
>> Or is there some other reason?
>
> No, just because I wanted both checks in this case. On the other path
> i915_sw_fence_done() isn't technically true as we are in process of
> performing the i915_sw_fence callback, so just i915_sw_fence_signaled().
> But we don't want to use i915_sw_fence_signaled() here as I don't want
> to think about the race. :)
>
> So since prio > engine.queue_priority should be cheaper than loading the
> cacheline for the request->submit.flags, I wanted that tested first as
> it will only fire, at most, once per engine.

Ok, didn't see the perf angle of it but makes sense.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Chris Wilson March 27, 2018, 1:34 p.m. UTC | #5
Quoting Mika Kuoppala (2018-03-27 13:18:06)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2018-03-27 12:34:31)
> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> 
> >> > If the request is still waiting on external fences, it has not yet been
> >> > submitted to the HW queue and so we can forgo kicking the submission
> >> > tasklet when re-evaluating its priority.
> >> >
> >> > This should have no impact other than reducing the number of tasklet
> >> > wakeups under signal heavy workloads (e.g. switching between engines).
> >> >
> >> > v2: Use prebaked container_of()
> >> >
> >> > References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports")
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> >> > Cc: Michel Thierry <michel.thierry@intel.com>
> >> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> >> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> > ---
> >> >  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++----
> >> >  1 file changed, 11 insertions(+), 4 deletions(-)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >> > index b4ab06b05e58..104b69e0494f 100644
> >> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> >> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >> > @@ -1051,12 +1051,16 @@ static void queue_request(struct intel_engine_cs *engine,
> >> >       list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> >> >  }
> >> >  
> >> > -static void submit_queue(struct intel_engine_cs *engine, int prio)
> >> > +static void __submit_queue(struct intel_engine_cs *engine, int prio)
> >> >  {
> >> > -     if (prio > engine->execlists.queue_priority) {
> >> >               engine->execlists.queue_priority = prio;
> >> >               tasklet_hi_schedule(&engine->execlists.tasklet);
> >> > -     }
> >> > +}
> >> > +
> >> > +static void submit_queue(struct intel_engine_cs *engine, int prio)
> >> > +{
> >> > +     if (prio > engine->execlists.queue_priority)
> >> > +             __submit_queue(engine, prio);
> >> 
> >> You did this...
> >> 
> >> >  }
> >> >  
> >> >  static void execlists_submit_request(struct i915_request *request)
> >> > @@ -1189,7 +1193,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
> >> >                       __list_del_entry(&pt->link);
> >> >                       queue_request(engine, pt, prio);
> >> >               }
> >> > -             submit_queue(engine, prio);
> >> > +
> >> > +             if (prio > engine->execlists.queue_priority &&
> >> > +                 i915_sw_fence_done(&pt_to_request(pt)->submit))
> >> > +                     __submit_queue(engine, prio);
> >> 
> >> ..to have explicit priority comparison on submit callsite I gather.
> >> Or is there some other reason?
> >
> > No, just because I wanted both checks in this case. On the other path
> > i915_sw_fence_done() isn't technically true as we are in process of
> > performing the i915_sw_fence callback, so just i915_sw_fence_signaled().
> > But we don't want to use i915_sw_fence_signaled() here as I don't want
> > to think about the race. :)
> >
> > So since prio > engine.queue_priority should be cheaper than loading the
> > cacheline for the request->submit.flags, I wanted that tested first as
> > it will only fire, at most, once per engine.
> 
> Ok, didn't see the perf angle of it but makes sense.
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Fixed up the double indent and pushed. Thanks for the review, and
digging into i915_sw_fence :)
-Chris

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b4ab06b05e58..104b69e0494f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1051,12 +1051,16 @@  static void queue_request(struct intel_engine_cs *engine,
 	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority) {
 		engine->execlists.queue_priority = prio;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
+}
+
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+	if (prio > engine->execlists.queue_priority)
+		__submit_queue(engine, prio);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1189,7 +1193,10 @@  static void execlists_schedule(struct i915_request *request, int prio)
 			__list_del_entry(&pt->link);
 			queue_request(engine, pt, prio);
 		}
-		submit_queue(engine, prio);
+
+		if (prio > engine->execlists.queue_priority &&
+		    i915_sw_fence_done(&pt_to_request(pt)->submit))
+			__submit_queue(engine, prio);
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);