diff mbox series

[2/7] drm/i915/execlists: Avoid kicking priority on the current context

Message ID 20180925083205.2229-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/7] drm/i915/selftests: Smoketest preemption | expand

Commit Message

Chris Wilson Sept. 25, 2018, 8:31 a.m. UTC
If the request is currently on the HW (in port 0), then we do not need
to kick the submission tasklet to evaluate whether we should be
preempting itself in order to execute it again.

In the case that was annoying me:

   execlists_schedule: rq(18:211173).prio=0 -> 2
   need_preempt: last(18:211174).prio=0, queue.prio=2

We are bumping the priority of the first of a pair of requests running
in the current context. Then when evaluating preempt, we would see that
that our priority request is higher than the last executing request in
ELSP0 and so trigger preemption, not realising that our intended request
was already executing.

v2: As we assume state of the execlists->port[] that is only valid while
we hold the timeline lock we have to repeat some earlier tests that on
the validity of the node.
v3: Wrap guc submission under the timeline.lock as is now the way of all
things.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
 drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
 2 files changed, 36 insertions(+), 23 deletions(-)

Comments

Tvrtko Ursulin Sept. 25, 2018, 10:14 a.m. UTC | #1
On 25/09/2018 09:31, Chris Wilson wrote:
> If the request is currently on the HW (in port 0), then we do not need
> to kick the submission tasklet to evaluate whether we should be
> preempting itself in order to execute it again.
> 
> In the case that was annoying me:
> 
>     execlists_schedule: rq(18:211173).prio=0 -> 2
>     need_preempt: last(18:211174).prio=0, queue.prio=2
> 
> We are bumping the priority of the first of a pair of requests running
> in the current context. Then when evaluating preempt, we would see that
> that our priority request is higher than the last executing request in
> ELSP0 and so trigger preemption, not realising that our intended request
> was already executing.
> 
> v2: As we assume state of the execlists->port[] that is only valid while
> we hold the timeline lock we have to repeat some earlier tests that on
> the validity of the node.
> v3: Wrap guc submission under the timeline.lock as is now the way of all
> things.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
>   drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
>   2 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a81f04d46e87..4874a212754c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -791,19 +791,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   static void guc_dequeue(struct intel_engine_cs *engine)
>   {
> -	unsigned long flags;
> -	bool submit;
> -
> -	local_irq_save(flags);
> -
> -	spin_lock(&engine->timeline.lock);
> -	submit = __guc_dequeue(engine);
> -	spin_unlock(&engine->timeline.lock);
> -
> -	if (submit)
> +	if (__guc_dequeue(engine))
>   		guc_submit(engine);
> -
> -	local_irq_restore(flags);
>   }
>   
>   static void guc_submission_tasklet(unsigned long data)
> @@ -812,6 +801,9 @@ static void guc_submission_tasklet(unsigned long data)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
>   	struct i915_request *rq;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	rq = port_request(port);
>   	while (rq && i915_request_completed(rq)) {
> @@ -835,6 +827,8 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		guc_dequeue(engine);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static struct i915_request *
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b58c10bc600..e8de250c3413 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -356,13 +356,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	__unwind_incomplete_requests(engine);
> -
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }

Could move to header as static inline since it is a trivial wrapper now.

>   
>   static inline void
> @@ -1234,9 +1229,13 @@ static void execlists_schedule(struct i915_request *request,
>   
>   		engine = sched_lock_engine(node, engine);
>   
> +		/* Recheck after acquiring the engine->timeline.lock */
>   		if (prio <= node->attr.priority)
>   			continue;
>   
> +		if (i915_sched_node_signaled(node))
> +			continue;
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
>   			if (last != engine) {
> @@ -1245,14 +1244,34 @@ static void execlists_schedule(struct i915_request *request,
>   			}
>   			GEM_BUG_ON(pl->priority != prio);
>   			list_move_tail(&node->link, &pl->requests);
> +		} else {
> +			/*
> +			 * If the request is not in the priolist queue because
> +			 * it is not yet runnable, then it doesn't contribute
> +			 * to our preemption decisions. On the other hand,
> +			 * if the request is on the HW, it too is not in the
> +			 * queue; but in that case we may still need to reorder
> +			 * the inflight requests.
> +			 */
> +			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
> +				continue;
>   		}
>   
> -		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> -			/* defer submission until after all of our updates */
> -			__update_queue(engine, prio);
> -			tasklet_hi_schedule(&engine->execlists.tasklet);
> -		}
> +		if (prio <= engine->execlists.queue_priority)
> +			continue;
> +
> +		/*
> +		 * If we are already the currently executing context, don't
> +		 * bother evaluating if we should preempt ourselves.
> +		 */
> +		if (sched_to_request(node)->global_seqno &&
> +		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> +				      sched_to_request(node)->global_seqno))
> +			continue;
> +
> +		/* Defer (tasklet) submission until after all of our updates. */
> +		__update_queue(engine, prio);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a81f04d46e87..4874a212754c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -791,19 +791,8 @@  static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 static void guc_dequeue(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
-	bool submit;
-
-	local_irq_save(flags);
-
-	spin_lock(&engine->timeline.lock);
-	submit = __guc_dequeue(engine);
-	spin_unlock(&engine->timeline.lock);
-
-	if (submit)
+	if (__guc_dequeue(engine))
 		guc_submit(engine);
-
-	local_irq_restore(flags);
 }
 
 static void guc_submission_tasklet(unsigned long data)
@@ -812,6 +801,9 @@  static void guc_submission_tasklet(unsigned long data)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -835,6 +827,8 @@  static void guc_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static struct i915_request *
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5b58c10bc600..e8de250c3413 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -356,13 +356,8 @@  execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__unwind_incomplete_requests(engine);
-
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -1234,9 +1229,13 @@  static void execlists_schedule(struct i915_request *request,
 
 		engine = sched_lock_engine(node, engine);
 
+		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority)
 			continue;
 
+		if (i915_sched_node_signaled(node))
+			continue;
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
 			if (last != engine) {
@@ -1245,14 +1244,34 @@  static void execlists_schedule(struct i915_request *request,
 			}
 			GEM_BUG_ON(pl->priority != prio);
 			list_move_tail(&node->link, &pl->requests);
+		} else {
+			/*
+			 * If the request is not in the priolist queue because
+			 * it is not yet runnable, then it doesn't contribute
+			 * to our preemption decisions. On the other hand,
+			 * if the request is on the HW, it too is not in the
+			 * queue; but in that case we may still need to reorder
+			 * the inflight requests.
+			 */
+			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
+				continue;
 		}
 
-		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
-			/* defer submission until after all of our updates */
-			__update_queue(engine, prio);
-			tasklet_hi_schedule(&engine->execlists.tasklet);
-		}
+		if (prio <= engine->execlists.queue_priority)
+			continue;
+
+		/*
+		 * If we are already the currently executing context, don't
+		 * bother evaluating if we should preempt ourselves.
+		 */
+		if (sched_to_request(node)->global_seqno &&
+		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
+				      sched_to_request(node)->global_seqno))
+			continue;
+
+		/* Defer (tasklet) submission until after all of our updates. */
+		__update_queue(engine, prio);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);