diff mbox series

[15/40] drm/i915: Priority boost for new clients

Message ID 20180919195544.1511-15-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/40] drm: Use default dma_fence hooks where possible for null syncobj | expand

Commit Message

Chris Wilson Sept. 19, 2018, 7:55 p.m. UTC
Taken from an idea used for FQ_CODEL, we give the first request of a
new request flows a small priority boost. These flows are likely to
correspond with short, interactive tasks and so be more latency sensitive
than the longer free running queues. As soon as the client has more than
one request in the queue, further requests are not boosted and it settles
down into ordinary steady state behaviour.  Such small kicks dramatically
help combat the starvation issue, by allowing each client the opportunity
to run even when the system is under heavy throughput load (within the
constraints of the user selected priority).

v2: Mark the preempted request as the start of a new flow, to prevent a
single client being continually gazumped by its peers.

Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
 drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
 3 files changed, 36 insertions(+), 9 deletions(-)

Comments

Tvrtko Ursulin Sept. 24, 2018, 10:29 a.m. UTC | #1
On 19/09/2018 20:55, Chris Wilson wrote:
> Taken from an idea used for FQ_CODEL, we give the first request of a
> new request flows a small priority boost. These flows are likely to
> correspond with short, interactive tasks and so be more latency sensitive
> than the longer free running queues. As soon as the client has more than
> one request in the queue, further requests are not boosted and it settles
> down into ordinary steady state behaviour.  Such small kicks dramatically
> help combat the starvation issue, by allowing each client the opportunity
> to run even when the system is under heavy throughput load (within the
> constraints of the user selected priority).
> 
> v2: Mark the preempted request as the start of a new flow, to prevent a
> single client being continually gazumped by its peers.
> 
> Testcase: igt/benchmarks/rrul
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>   drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
>   3 files changed, 36 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a492385b2089..56140ca054e8 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
>   	 */
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> -	if (engine->schedule)
> -		engine->schedule(request, &request->gem_context->sched);
> +	if (engine->schedule) {
> +		struct i915_sched_attr attr = request->gem_context->sched;
> +
> +		/*
> +		 * Boost priorities to new clients (new request flows).
> +		 *
> +		 * Allow interactive/synchronous clients to jump ahead of
> +		 * the bulk clients. (FQ_CODEL)
> +		 */
> +		if (!prev || i915_request_completed(prev))
> +			attr.priority |= I915_PRIORITY_NEWCLIENT;
> +
> +		engine->schedule(request, &attr);
> +	}
>   	rcu_read_unlock();
>   	i915_sw_fence_commit(&request->submit);
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 7edfad0abfd7..93e43e263d8c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,12 +19,14 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 0
> +#define I915_USER_PRIORITY_SHIFT 1
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))

Is the cast important and why?

> +
>   struct i915_sched_attr {
>   	/**
>   	 * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index aeae82b5223c..ee9a656e549c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -363,9 +363,9 @@ static void unwind_wa_tail(struct i915_request *rq)
>   
>   static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   {
> -	struct i915_request *rq, *rn;
> +	struct i915_request *rq, *rn, *active = NULL;
>   	struct list_head *uninitialized_var(pl);
> -	int last_prio = I915_PRIORITY_INVALID;
> +	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>   
>   	lockdep_assert_held(&engine->timeline.lock);
>   
> @@ -373,19 +373,32 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   					 &engine->timeline.requests,
>   					 link) {
>   		if (i915_request_completed(rq))
> -			return;
> +			break;
>   
>   		__i915_request_unsubmit(rq);
>   		unwind_wa_tail(rq);
>   
>   		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != last_prio) {
> -			last_prio = rq_prio(rq);
> -			pl = lookup_priolist(engine, last_prio);
> +		if (rq_prio(rq) != prio) {
> +			prio = rq_prio(rq);
> +			pl = lookup_priolist(engine, prio);
>   		}
>   		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
>   		list_add(&rq->sched.link, pl);
> +
> +		active = rq;
> +	}
> +
> +	/*
> +	 * The active request is now effectively the start of a new client
> +	 * stream, so give it the equivalent small priority bump to prevent
> +	 * it being gazumped a second time by another peer.
> +	 */
> +	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
> +		prio |= I915_PRIORITY_NEWCLIENT;
> +		list_move_tail(&active->sched.link,
> +			       lookup_priolist(engine, prio));
>   	}
>   }
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson Sept. 25, 2018, 8:01 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Taken from an idea used for FQ_CODEL, we give the first request of a
> > new request flows a small priority boost. These flows are likely to
> > correspond with short, interactive tasks and so be more latency sensitive
> > than the longer free running queues. As soon as the client has more than
> > one request in the queue, further requests are not boosted and it settles
> > down into ordinary steady state behaviour.  Such small kicks dramatically
> > help combat the starvation issue, by allowing each client the opportunity
> > to run even when the system is under heavy throughput load (within the
> > constraints of the user selected priority).
> > 
> > v2: Mark the preempted request as the start of a new flow, to prevent a
> > single client being continually gazumped by its peers.
> > 
> > Testcase: igt/benchmarks/rrul
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
> >   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
> >   drivers/gpu/drm/i915/intel_lrc.c      | 25 +++++++++++++++++++------
> >   3 files changed, 36 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index a492385b2089..56140ca054e8 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
> >        */
> >       local_bh_disable();
> >       rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> > -     if (engine->schedule)
> > -             engine->schedule(request, &request->gem_context->sched);
> > +     if (engine->schedule) {
> > +             struct i915_sched_attr attr = request->gem_context->sched;
> > +
> > +             /*
> > +              * Boost priorities to new clients (new request flows).
> > +              *
> > +              * Allow interactive/synchronous clients to jump ahead of
> > +              * the bulk clients. (FQ_CODEL)
> > +              */
> > +             if (!prev || i915_request_completed(prev))
> > +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
> > +
> > +             engine->schedule(request, &attr);
> > +     }
> >       rcu_read_unlock();
> >       i915_sw_fence_commit(&request->submit);
> >       local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > index 7edfad0abfd7..93e43e263d8c 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > @@ -19,12 +19,14 @@ enum {
> >       I915_PRIORITY_INVALID = INT_MIN
> >   };
> >   
> > -#define I915_USER_PRIORITY_SHIFT 0
> > +#define I915_USER_PRIORITY_SHIFT 1
> >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >   
> >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >   
> > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> 
> Is the cast important and why?

Unreliable memory says there was something iffy about the code generation
at one point.
-Chris
Chris Wilson Sept. 25, 2018, 8:26 a.m. UTC | #3
Quoting Chris Wilson (2018-09-25 09:01:06)
> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> > 
> > On 19/09/2018 20:55, Chris Wilson wrote:
> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > > index 7edfad0abfd7..93e43e263d8c 100644
> > > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > > @@ -19,12 +19,14 @@ enum {
> > >       I915_PRIORITY_INVALID = INT_MIN
> > >   };
> > >   
> > > -#define I915_USER_PRIORITY_SHIFT 0
> > > +#define I915_USER_PRIORITY_SHIFT 1
> > >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> > >   
> > >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> > >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> > >   
> > > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> > 
> > Is the cast important and why?
> 
> Unreliable memory says there was something iffy about the code generation
> at one point.

drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
  int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;

-Chris
Tvrtko Ursulin Sept. 25, 2018, 8:57 a.m. UTC | #4
On 25/09/2018 09:26, Chris Wilson wrote:
> Quoting Chris Wilson (2018-09-25 09:01:06)
>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>>>
>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>>> index 7edfad0abfd7..93e43e263d8c 100644
>>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>>> @@ -19,12 +19,14 @@ enum {
>>>>        I915_PRIORITY_INVALID = INT_MIN
>>>>    };
>>>>    
>>>> -#define I915_USER_PRIORITY_SHIFT 0
>>>> +#define I915_USER_PRIORITY_SHIFT 1
>>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>>    
>>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>>    
>>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>>>
>>> Is the cast important and why?
>>
>> Unreliable memory says there was something iffy about the code generation
>> at one point.
> 
> drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>    int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;

So correct cast would be (int)BIT(..), or maybe not use BIT for less 
confusion?

Regards,

Tvrtko
Chris Wilson Sept. 25, 2018, 9:06 a.m. UTC | #5
Quoting Tvrtko Ursulin (2018-09-25 09:57:11)
> 
> On 25/09/2018 09:26, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-09-25 09:01:06)
> >> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
> >>>
> >>> On 19/09/2018 20:55, Chris Wilson wrote:
> >>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> index 7edfad0abfd7..93e43e263d8c 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> >>>> @@ -19,12 +19,14 @@ enum {
> >>>>        I915_PRIORITY_INVALID = INT_MIN
> >>>>    };
> >>>>    
> >>>> -#define I915_USER_PRIORITY_SHIFT 0
> >>>> +#define I915_USER_PRIORITY_SHIFT 1
> >>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >>>>    
> >>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >>>>    
> >>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
> >>>
> >>> Is the cast important and why?
> >>
> >> Unreliable memory says there was something iffy about the code generation
> >> at one point.
> > 
> > drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
> > drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
> >    int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
> 
> So correct cast would be (int)BIT(..), or maybe not use BIT for less 
> confusion?

It's a bit, I like them unsigned to avoid sign extension confusion most
of the time. (What am I saying, sign extension is already confusing and
no matter what you do, you always want the opposite.)
-Chris
Tvrtko Ursulin Sept. 25, 2018, 9:08 a.m. UTC | #6
On 25/09/2018 10:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 09:57:11)
>>
>> On 25/09/2018 09:26, Chris Wilson wrote:
>>> Quoting Chris Wilson (2018-09-25 09:01:06)
>>>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>>>>>
>>>>> On 19/09/2018 20:55, Chris Wilson wrote:
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> index 7edfad0abfd7..93e43e263d8c 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>>>>> @@ -19,12 +19,14 @@ enum {
>>>>>>         I915_PRIORITY_INVALID = INT_MIN
>>>>>>     };
>>>>>>     
>>>>>> -#define I915_USER_PRIORITY_SHIFT 0
>>>>>> +#define I915_USER_PRIORITY_SHIFT 1
>>>>>>     #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>>>>     
>>>>>>     #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>>>>     #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>>>>     
>>>>>> +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>>>>>
>>>>> Is the cast important and why?
>>>>
>>>> Unreliable memory says there was something iffy about the code generation
>>>> at one point.
>>>
>>> drivers/gpu/drm/i915/intel_lrc.c: In function ‘__unwind_incomplete_requests’:
>>> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion from ‘long unsigned int’ to ‘int’ changes value from ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>>>     int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>>
>> So correct cast would be (int)BIT(..), or maybe not use BIT for less
>> confusion?
> 
> It's a bit, I like them unsigned to avoid sign extension confusion most
> of the time. (What am I saying, sign extension is already confusing and
> no matter what you do, you always want the opposite.)

Okay, it's internal so no big deal either way.

Regards,

Tvrtko
Michal Wajdeczko Sept. 25, 2018, 11:20 a.m. UTC | #7
On Tue, 25 Sep 2018 10:26:57 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Chris Wilson (2018-09-25 09:01:06)
>> Quoting Tvrtko Ursulin (2018-09-24 11:29:52)
>> >
>> > On 19/09/2018 20:55, Chris Wilson wrote:
>> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h  
>> b/drivers/gpu/drm/i915/i915_scheduler.h
>> > > index 7edfad0abfd7..93e43e263d8c 100644
>> > > --- a/drivers/gpu/drm/i915/i915_scheduler.h
>> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>> > > @@ -19,12 +19,14 @@ enum {
>> > >       I915_PRIORITY_INVALID = INT_MIN
>> > >   };
>> > >
>> > > -#define I915_USER_PRIORITY_SHIFT 0
>> > > +#define I915_USER_PRIORITY_SHIFT 1
>> > >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>> > >
>> > >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>> > >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>> > >
>> > > +#define I915_PRIORITY_NEWCLIENT      ((u8)BIT(0))
>> >
>> > Is the cast important and why?
>>
>> Unreliable memory says there was something iffy about the code  
>> generation
>> at one point.
>
> drivers/gpu/drm/i915/intel_lrc.c: In function  
> ‘__unwind_incomplete_requests’:
> drivers/gpu/drm/i915/intel_lrc.c:272:13: error: overflow in conversion  
> from ‘long unsigned int’ to ‘int’ changes value from  
> ‘18446744071562067969’ to ‘-2147483647’ [-Werror=overflow]
>   int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
>

If you plan to use I915_PRIORITY_NEWCLIENT in 'int' vars then
you should not use BIT macro that returns 'unsigned int long'

As I915_USER_PRIORITY is already using explicit shift, maybe the
same can be done for I915_PRIORITY_NEWCLIENT:

	#define I915_PRIORITY_NEWCLIENT      (1 << 0)

Michal
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..56140ca054e8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1127,8 +1127,20 @@  void i915_request_add(struct i915_request *request)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+	if (engine->schedule) {
+		struct i915_sched_attr attr = request->gem_context->sched;
+
+		/*
+		 * Boost priorities to new clients (new request flows).
+		 *
+		 * Allow interactive/synchronous clients to jump ahead of
+		 * the bulk clients. (FQ_CODEL)
+		 */
+		if (!prev || i915_request_completed(prev))
+			attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+		engine->schedule(request, &attr);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7edfad0abfd7..93e43e263d8c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,12 +19,14 @@  enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY_SHIFT 1
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index aeae82b5223c..ee9a656e549c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,9 +363,9 @@  static void unwind_wa_tail(struct i915_request *rq)
 
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq, *rn;
+	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int last_prio = I915_PRIORITY_INVALID;
+	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
 
 	lockdep_assert_held(&engine->timeline.lock);
 
@@ -373,19 +373,32 @@  static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 					 &engine->timeline.requests,
 					 link) {
 		if (i915_request_completed(rq))
-			return;
+			break;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != last_prio) {
-			last_prio = rq_prio(rq);
-			pl = lookup_priolist(engine, last_prio);
+		if (rq_prio(rq) != prio) {
+			prio = rq_prio(rq);
+			pl = lookup_priolist(engine, prio);
 		}
 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 		list_add(&rq->sched.link, pl);
+
+		active = rq;
+	}
+
+	/*
+	 * The active request is now effectively the start of a new client
+	 * stream, so give it the equivalent small priority bump to prevent
+	 * it being gazumped a second time by another peer.
+	 */
+	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
+		prio |= I915_PRIORITY_NEWCLIENT;
+		list_move_tail(&active->sched.link,
+			       lookup_priolist(engine, prio));
 	}
 }