diff mbox series

[08/14] drm/i915: Only reschedule the submission tasklet if preemption is possible

Message ID 20190501114541.10077-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/14] drm/i915/hangcheck: Track context changes | expand

Commit Message

Chris Wilson May 1, 2019, 11:45 a.m. UTC
If we couple the scheduler more tightly with the execlists policy, we
can apply the preemption policy to the question of whether we need to
kick the tasklet at all for this priority bump.

v2: Rephrase it as a core i915 policy and not an execlists foible.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h      | 18 ------------------
 drivers/gpu/drm/i915/gt/intel_lrc.c         |  4 ++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c      |  7 ++++++-
 drivers/gpu/drm/i915/i915_request.c         |  2 --
 drivers/gpu/drm/i915/i915_scheduler.c       | 18 +++++++++++-------
 drivers/gpu/drm/i915/i915_scheduler.h       | 18 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 7 files changed, 39 insertions(+), 31 deletions(-)

Comments

Tvrtko Ursulin May 3, 2019, 10:53 a.m. UTC | #1
On 01/05/2019 12:45, Chris Wilson wrote:
> If we couple the scheduler more tightly with the execlists policy, we
> can apply the preemption policy to the question of whether we need to
> kick the tasklet at all for this priority bump.
> 
> v2: Rephrase it as a core i915 policy and not an execlists foible.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h      | 18 ------------------
>   drivers/gpu/drm/i915/gt/intel_lrc.c         |  4 ++--
>   drivers/gpu/drm/i915/gt/selftest_lrc.c      |  7 ++++++-
>   drivers/gpu/drm/i915/i915_request.c         |  2 --
>   drivers/gpu/drm/i915/i915_scheduler.c       | 18 +++++++++++-------
>   drivers/gpu/drm/i915/i915_scheduler.h       | 18 ++++++++++++++++++
>   drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
>   7 files changed, 39 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index f5b0f27cecb6..06d785533502 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -106,24 +106,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
>   
>   void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
>   
> -static inline bool __execlists_need_preempt(int prio, int last)
> -{
> -	/*
> -	 * Allow preemption of low -> normal -> high, but we do
> -	 * not allow low priority tasks to preempt other low priority
> -	 * tasks under the impression that latency for low priority
> -	 * tasks does not matter (as much as background throughput),
> -	 * so kiss.
> -	 *
> -	 * More naturally we would write
> -	 *	prio >= max(0, last);
> -	 * except that we wish to prevent triggering preemption at the same
> -	 * priority level: the task that is running should remain running
> -	 * to preserve FIFO ordering of dependencies.
> -	 */
> -	return prio > max(I915_PRIORITY_NORMAL - 1, last);
> -}
> -
>   static inline void
>   execlists_set_active(struct intel_engine_execlists *execlists,
>   		     unsigned int bit)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 7be54b868d8e..35aae7b5c6b9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -251,8 +251,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	 * ourselves, ignore the request.
>   	 */
>   	last_prio = effective_prio(rq);
> -	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
> -				      last_prio))
> +	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
> +					 last_prio))
>   		return false;
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 84538f69185b..4b042893dc0e 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -638,14 +638,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
>   	GEM_BUG_ON(i915_request_completed(rq));
>   
>   	i915_sw_fence_init(&rq->submit, dummy_notify);
> -	i915_sw_fence_commit(&rq->submit);
> +	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
>   
>   	return rq;
>   }
>   
>   static void dummy_request_free(struct i915_request *dummy)
>   {
> +	/* We have to fake the CS interrupt to kick the next request */
> +	i915_sw_fence_commit(&dummy->submit);
> +
>   	i915_request_mark_complete(dummy);
> +	dma_fence_signal(&dummy->fence);
> +
>   	i915_sched_node_fini(&dummy->sched);
>   	i915_sw_fence_fini(&dummy->submit);
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index af8c9fa5e066..2e22da66a56c 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1358,9 +1358,7 @@ long i915_request_wait(struct i915_request *rq,
>   	if (flags & I915_WAIT_PRIORITY) {
>   		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
>   			gen6_rps_boost(rq);
> -		local_bh_disable(); /* suspend tasklets for reprioritisation */
>   		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
> -		local_bh_enable(); /* kick tasklets en masse */
>   	}
>   
>   	wait.tsk = current;
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 39bc4f54e272..88d18600f5db 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -261,16 +261,20 @@ sched_lock_engine(const struct i915_sched_node *node,
>   	return engine;
>   }
>   
> -static bool inflight(const struct i915_request *rq,
> -		     const struct intel_engine_cs *engine)
> +static inline int rq_prio(const struct i915_request *rq)
>   {
> -	const struct i915_request *active;
> +	return rq->sched.attr.priority | __NO_PREEMPTION;
> +}
> +
> +static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
> +{
> +	const struct i915_request *inflight =
> +		port_request(engine->execlists.port);
>   
> -	if (!i915_request_is_active(rq))
> +	if (!inflight)
>   		return false;
>   
> -	active = port_request(engine->execlists.port);
> -	return active->hw_context == rq->hw_context;
> +	return i915_scheduler_need_preempt(prio, rq_prio(inflight));
>   }
>   
>   static void __i915_schedule(struct i915_request *rq,
> @@ -400,7 +404,7 @@ static void __i915_schedule(struct i915_request *rq,
>   		 * If we are already the currently executing context, don't
>   		 * bother evaluating if we should preempt ourselves.
>   		 */
> -		if (inflight(node_to_request(node), engine))
> +		if (!kick_tasklet(engine, prio))
>   			continue;

I don't see other callers for kick_tasklet in the series so I am 
thinking why not make the function called kick_tasklet actually kick the 
tasklet? ;)

Could call it maybe_kick_tasklet and just end the loop with it.

We could even abstract to the engine like engine->signal_preemption() or 
something, to hide the tasklet from the scheduler. But probably not 
worth it right now.

Regards,

Tvrtko

>   
>   		/* Defer (tasklet) submission until after all of our updates. */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 07d243acf553..7eefccff39bf 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -52,4 +52,22 @@ static inline void i915_priolist_free(struct i915_priolist *p)
>   		__i915_priolist_free(p);
>   }
>   
> +static inline bool i915_scheduler_need_preempt(int prio, int active)
> +{
> +	/*
> +	 * Allow preemption of low -> normal -> high, but we do
> +	 * not allow low priority tasks to preempt other low priority
> +	 * tasks under the impression that latency for low priority
> +	 * tasks does not matter (as much as background throughput),
> +	 * so kiss.
> +	 *
> +	 * More naturally we would write
> +	 *	prio >= max(0, last);
> +	 * except that we wish to prevent triggering preemption at the same
> +	 * priority level: the task that is running should remain running
> +	 * to preserve FIFO ordering of dependencies.
> +	 */
> +	return prio > max(I915_PRIORITY_NORMAL - 1, active);
> +}
> +
>   #endif /* _I915_SCHEDULER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index ed94001028f2..cb9964ae229d 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -746,7 +746,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   				&engine->i915->guc.preempt_work[engine->id];
>   			int prio = execlists->queue_priority_hint;
>   
> -			if (__execlists_need_preempt(prio, port_prio(port))) {
> +			if (i915_scheduler_need_preempt(prio,
> +							port_prio(port))) {
>   				execlists_set_active(execlists,
>   						     EXECLISTS_ACTIVE_PREEMPT);
>   				queue_work(engine->i915->guc.preempt_wq,
>
Chris Wilson May 3, 2019, 10:58 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-05-03 11:53:31)
> 
> On 01/05/2019 12:45, Chris Wilson wrote:
> > If we couple the scheduler more tightly with the execlists policy, we
> > can apply the preemption policy to the question of whether we need to
> > kick the tasklet at all for this priority bump.
> > 
> > v2: Rephrase it as a core i915 policy and not an execlists foible.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine.h      | 18 ------------------
> >   drivers/gpu/drm/i915/gt/intel_lrc.c         |  4 ++--
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c      |  7 ++++++-
> >   drivers/gpu/drm/i915/i915_request.c         |  2 --
> >   drivers/gpu/drm/i915/i915_scheduler.c       | 18 +++++++++++-------
> >   drivers/gpu/drm/i915/i915_scheduler.h       | 18 ++++++++++++++++++
> >   drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
> >   7 files changed, 39 insertions(+), 31 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> > index f5b0f27cecb6..06d785533502 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > @@ -106,24 +106,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
> >   
> >   void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
> >   
> > -static inline bool __execlists_need_preempt(int prio, int last)
> > -{
> > -     /*
> > -      * Allow preemption of low -> normal -> high, but we do
> > -      * not allow low priority tasks to preempt other low priority
> > -      * tasks under the impression that latency for low priority
> > -      * tasks does not matter (as much as background throughput),
> > -      * so kiss.
> > -      *
> > -      * More naturally we would write
> > -      *      prio >= max(0, last);
> > -      * except that we wish to prevent triggering preemption at the same
> > -      * priority level: the task that is running should remain running
> > -      * to preserve FIFO ordering of dependencies.
> > -      */
> > -     return prio > max(I915_PRIORITY_NORMAL - 1, last);
> > -}
> > -
> >   static inline void
> >   execlists_set_active(struct intel_engine_execlists *execlists,
> >                    unsigned int bit)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 7be54b868d8e..35aae7b5c6b9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -251,8 +251,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> >        * ourselves, ignore the request.
> >        */
> >       last_prio = effective_prio(rq);
> > -     if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
> > -                                   last_prio))
> > +     if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
> > +                                      last_prio))
> >               return false;
> >   
> >       /*
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > index 84538f69185b..4b042893dc0e 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > @@ -638,14 +638,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
> >       GEM_BUG_ON(i915_request_completed(rq));
> >   
> >       i915_sw_fence_init(&rq->submit, dummy_notify);
> > -     i915_sw_fence_commit(&rq->submit);
> > +     set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
> >   
> >       return rq;
> >   }
> >   
> >   static void dummy_request_free(struct i915_request *dummy)
> >   {
> > +     /* We have to fake the CS interrupt to kick the next request */
> > +     i915_sw_fence_commit(&dummy->submit);
> > +
> >       i915_request_mark_complete(dummy);
> > +     dma_fence_signal(&dummy->fence);
> > +
> >       i915_sched_node_fini(&dummy->sched);
> >       i915_sw_fence_fini(&dummy->submit);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index af8c9fa5e066..2e22da66a56c 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1358,9 +1358,7 @@ long i915_request_wait(struct i915_request *rq,
> >       if (flags & I915_WAIT_PRIORITY) {
> >               if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
> >                       gen6_rps_boost(rq);
> > -             local_bh_disable(); /* suspend tasklets for reprioritisation */
> >               i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
> > -             local_bh_enable(); /* kick tasklets en masse */
> >       }
> >   
> >       wait.tsk = current;
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> > index 39bc4f54e272..88d18600f5db 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > @@ -261,16 +261,20 @@ sched_lock_engine(const struct i915_sched_node *node,
> >       return engine;
> >   }
> >   
> > -static bool inflight(const struct i915_request *rq,
> > -                  const struct intel_engine_cs *engine)
> > +static inline int rq_prio(const struct i915_request *rq)
> >   {
> > -     const struct i915_request *active;
> > +     return rq->sched.attr.priority | __NO_PREEMPTION;
> > +}
> > +
> > +static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
> > +{
> > +     const struct i915_request *inflight =
> > +             port_request(engine->execlists.port);
> >   
> > -     if (!i915_request_is_active(rq))
> > +     if (!inflight)
> >               return false;
> >   
> > -     active = port_request(engine->execlists.port);
> > -     return active->hw_context == rq->hw_context;
> > +     return i915_scheduler_need_preempt(prio, rq_prio(inflight));
> >   }
> >   
> >   static void __i915_schedule(struct i915_request *rq,
> > @@ -400,7 +404,7 @@ static void __i915_schedule(struct i915_request *rq,
> >                * If we are already the currently executing context, don't
> >                * bother evaluating if we should preempt ourselves.
> >                */
> > -             if (inflight(node_to_request(node), engine))
> > +             if (!kick_tasklet(engine, prio))
> >                       continue;
> 
> I don't see other callers for kick_tasklet in the series so I am 
> thinking why not make the function called kick_tasklet actually kick the 
> tasklet? ;)

You don't see the ghostly presence of set_preemption_timeout() here? Ok,
that could be pushed into the function as well.

> Could call it maybe_kick_tasklet and just end the loop with it.
> 
> We could even abstract to the engine like engine->signal_preemption() or 
> something, to hide the tasklet from the scheduler. But probably not 
> worth it right now.

Haven't quite figured out the best approach. I've done both so far for
preemption-timeout, at the moment I've pulled the hrtimer into
i915_scheduler.c (frankly because it was easier to on the old rebasing);
but the dust is nowhere close to settled.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index f5b0f27cecb6..06d785533502 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -106,24 +106,6 @@  hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
-static inline bool __execlists_need_preempt(int prio, int last)
-{
-	/*
-	 * Allow preemption of low -> normal -> high, but we do
-	 * not allow low priority tasks to preempt other low priority
-	 * tasks under the impression that latency for low priority
-	 * tasks does not matter (as much as background throughput),
-	 * so kiss.
-	 *
-	 * More naturally we would write
-	 *	prio >= max(0, last);
-	 * except that we wish to prevent triggering preemption at the same
-	 * priority level: the task that is running should remain running
-	 * to preserve FIFO ordering of dependencies.
-	 */
-	return prio > max(I915_PRIORITY_NORMAL - 1, last);
-}
-
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7be54b868d8e..35aae7b5c6b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -251,8 +251,8 @@  static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * ourselves, ignore the request.
 	 */
 	last_prio = effective_prio(rq);
-	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
-				      last_prio))
+	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
+					 last_prio))
 		return false;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 84538f69185b..4b042893dc0e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -638,14 +638,19 @@  static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
-	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
 
 static void dummy_request_free(struct i915_request *dummy)
 {
+	/* We have to fake the CS interrupt to kick the next request */
+	i915_sw_fence_commit(&dummy->submit);
+
 	i915_request_mark_complete(dummy);
+	dma_fence_signal(&dummy->fence);
+
 	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index af8c9fa5e066..2e22da66a56c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1358,9 +1358,7 @@  long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			gen6_rps_boost(rq);
-		local_bh_disable(); /* suspend tasklets for reprioritisation */
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-		local_bh_enable(); /* kick tasklets en masse */
 	}
 
 	wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..88d18600f5db 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -261,16 +261,20 @@  sched_lock_engine(const struct i915_sched_node *node,
 	return engine;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	const struct i915_request *active;
+	return rq->sched.attr.priority | __NO_PREEMPTION;
+}
+
+static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
+{
+	const struct i915_request *inflight =
+		port_request(engine->execlists.port);
 
-	if (!i915_request_is_active(rq))
+	if (!inflight)
 		return false;
 
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
+	return i915_scheduler_need_preempt(prio, rq_prio(inflight));
 }
 
 static void __i915_schedule(struct i915_request *rq,
@@ -400,7 +404,7 @@  static void __i915_schedule(struct i915_request *rq,
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (inflight(node_to_request(node), engine))
+		if (!kick_tasklet(engine, prio))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..7eefccff39bf 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -52,4 +52,22 @@  static inline void i915_priolist_free(struct i915_priolist *p)
 		__i915_priolist_free(p);
 }
 
+static inline bool i915_scheduler_need_preempt(int prio, int active)
+{
+	/*
+	 * Allow preemption of low -> normal -> high, but we do
+	 * not allow low priority tasks to preempt other low priority
+	 * tasks under the impression that latency for low priority
+	 * tasks does not matter (as much as background throughput),
+	 * so kiss.
+	 *
+	 * More naturally we would write
+	 *	prio >= max(0, last);
+	 * except that we wish to prevent triggering preemption at the same
+	 * priority level: the task that is running should remain running
+	 * to preserve FIFO ordering of dependencies.
+	 */
+	return prio > max(I915_PRIORITY_NORMAL - 1, active);
+}
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index ed94001028f2..cb9964ae229d 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -746,7 +746,8 @@  static bool __guc_dequeue(struct intel_engine_cs *engine)
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (__execlists_need_preempt(prio, port_prio(port))) {
+			if (i915_scheduler_need_preempt(prio,
+							port_prio(port))) {
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_PREEMPT);
 				queue_work(engine->i915->guc.preempt_wq,