diff mbox

[11/25] drm/i915: Spin after waking up for an interrupt

Message ID 1466849588-17558-12-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 25, 2016, 10:12 a.m. UTC
When waiting for an interrupt (waiting for the GPU to complete some
work), we know we are the single waiter for the GPU. We also know when
the GPU has nearly completed our request (or at least started processing
it), so after being woken and we detect that the GPU is actively working
on our request, allow the bottom-half (the first waiter who wakes up to
handle checking the seqno after the interrupt) to spin for a very short
while to reduce client latencies.

The impact is minimal, there was an improvement to the realtime-vs-many
clients case, but exporting the function proves useful later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
 drivers/gpu/drm/i915/i915_drv.h      | 26 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
 5 files changed, 45 insertions(+), 29 deletions(-)

Comments

Tvrtko Ursulin June 27, 2016, 10:32 a.m. UTC | #1
On 25/06/16 11:12, Chris Wilson wrote:
> When waiting for an interrupt (waiting for the GPU to complete some
> work), we know we are the single waiter for the GPU. We also know when

You mean we know we were the first waiter, if we have been woken up? Or 
perhaps the single waiter for this request? (more or less, maybe close 
enough) Or also s/", we know we are/, and we know we are" ?

Regards,

Tvrtko

> the GPU has nearly completed our request (or at least started processing
> it), so after being woken and we detect that the GPU is actively working
> on our request, allow the bottom-half (the first waiter who wakes up to
> handle checking the seqno after the interrupt) to spin for a very short
> while to reduce client latencies.
>
> The impact is minimal, there was an improvement to the realtime-vs-many
> clients case, but exporting the function proves useful later.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h      | 26 +++++++++++++++--------
>   drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++---------------
>   drivers/gpu/drm/i915/intel_display.c |  2 +-
>   drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
>   5 files changed, 45 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c81c27dca74d..dba298c7a8b8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>   					   i915_gem_request_get_seqno(work->flip_queued_req),
>   					   dev_priv->next_seqno,
>   					   engine->get_seqno(engine),
> -					   i915_gem_request_completed(work->flip_queued_req, true));
> +					   i915_gem_request_completed(work->flip_queued_req));
>   			} else
>   				seq_printf(m, "Flip not associated with any ring\n");
>   			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed92c434f4e6..4cc372df3534 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3277,24 +3277,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>   	return (int32_t)(seq1 - seq2) >= 0;
>   }
>
> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> -					   bool lazy_coherency)
> +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->previous_seqno);
>   }
>
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> -					      bool lazy_coherency)
> +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->engine->irq_seqno_barrier)
> -		req->engine->irq_seqno_barrier(req->engine);
>   	return i915_seqno_passed(req->engine->get_seqno(req->engine),
>   				 req->seqno);
>   }
>
> +bool __i915_spin_request(const struct drm_i915_gem_request *request,
> +			 int state, unsigned long timeout_us);
> +static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
> +				     int state, unsigned long timeout_us)
> +{
> +	return (i915_gem_request_started(request) &&
> +		__i915_spin_request(request, state, timeout_us));
> +}
> +
>   int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
>   int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
>
> @@ -3972,6 +3975,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
>
>   static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *engine = req->engine;
> +
>   	/* Ensure our read of the seqno is coherent so that we
>   	 * do not "miss an interrupt" (i.e. if this is the last
>   	 * request and the seqno write from the GPU is not visible
> @@ -3983,7 +3988,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   	 * but it is easier and safer to do it every time the waiter
>   	 * is woken.
>   	 */
> -	if (i915_gem_request_completed(req, false))
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	if (i915_gem_request_completed(req))
>   		return true;
>
>   	/* We need to check whether any gpu reset happened in between
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fe5ba4aa336c..30bd0d0d917b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1375,9 +1375,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu)
>   	return this_cpu != cpu;
>   }
>
> -static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
> +bool __i915_spin_request(const struct drm_i915_gem_request *req,
> +			 int state, unsigned long timeout_us)
>   {
> -	unsigned long timeout;
>   	unsigned cpu;
>
>   	/* When waiting for high frequency requests, e.g. during synchronous
> @@ -1390,19 +1390,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>
> -	/* Only spin if we know the GPU is processing this request */
> -	if (!i915_gem_request_started(req, true))
> -		return false;
> -
> -	timeout = local_clock_us(&cpu) + 5;
> +	timeout_us += local_clock_us(&cpu);
>   	do {
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			return true;
>
>   		if (signal_pending_state(state, current))
>   			break;
>
> -		if (busywait_stop(timeout, cpu))
> +		if (busywait_stop(timeout_us, cpu))
>   			break;
>
>   		cpu_relax_lowlatency();
> @@ -1445,7 +1441,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   	if (list_empty(&req->list))
>   		return 0;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return 0;
>
>   	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> @@ -1470,7 +1466,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>
>   	/* Optimistic spin for the next ~jiffie before touching IRQs */
> -	if (__i915_spin_request(req, state))
> +	if (i915_spin_request(req, state, 5))
>   		goto complete;
>
>   	set_current_state(state);
> @@ -1512,6 +1508,10 @@ wakeup:
>   		 */
>   		if (__i915_request_irq_complete(req))
>   			break;
> +
> +		/* Only spin if we know the GPU is processing this request */
> +		if (i915_spin_request(req, state, 2))
> +			break;
>   	}
>   	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
>
> @@ -3050,8 +3050,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_request *request;
>
> +	/* We are called by the error capture and reset at a random
> +	 * point in time. In particular, note that neither is crucially
> +	 * ordered with an interrupt. After a hang, the GPU is dead and we
> +	 * assume that no more writes can happen (we waited long enough for
> +	 * all writes that were in transaction to be flushed) - adding an
> +	 * extra delay for a recent interrupt is pointless. Hence, we do
> +	 * not need an engine->irq_seqno_barrier() before the seqno reads.
> +	 */
>   	list_for_each_entry(request, &engine->request_list, list) {
> -		if (i915_gem_request_completed(request, false))
> +		if (i915_gem_request_completed(request))
>   			continue;
>
>   		return request;
> @@ -3183,7 +3191,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   					   struct drm_i915_gem_request,
>   					   list);
>
> -		if (!i915_gem_request_completed(request, true))
> +		if (!i915_gem_request_completed(request))
>   			break;
>
>   		i915_gem_request_retire(request);
> @@ -3207,7 +3215,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
>   	}
>
>   	if (unlikely(engine->trace_irq_req &&
> -		     i915_gem_request_completed(engine->trace_irq_req, true))) {
> +		     i915_gem_request_completed(engine->trace_irq_req))) {
>   		engine->irq_put(engine);
>   		i915_gem_request_assign(&engine->trace_irq_req, NULL);
>   	}
> @@ -3305,7 +3313,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>   		if (req == NULL)
>   			continue;
>
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			i915_gem_object_retire__read(obj, i);
>   	}
>
> @@ -3413,7 +3421,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   	if (to == from)
>   		return 0;
>
> -	if (i915_gem_request_completed(from_req, true))
> +	if (i915_gem_request_completed(from_req))
>   		return 0;
>
>   	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 699da067bb85..adeafbc96760 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11594,7 +11594,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
>   	vblank = intel_crtc_get_vblank_counter(intel_crtc);
>   	if (work->flip_ready_vblank == 0) {
>   		if (work->flip_queued_req &&
> -		    !i915_gem_request_completed(work->flip_queued_req, true))
> +		    !i915_gem_request_completed(work->flip_queued_req))
>   			return false;
>
>   		work->flip_ready_vblank = vblank;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 4f612b96224e..2f54fc5d1131 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7739,7 +7739,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	struct request_boost *boost = container_of(work, struct request_boost, work);
>   	struct drm_i915_gem_request *req = boost->req;
>
> -	if (!i915_gem_request_completed(req, true))
> +	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
>
>   	i915_gem_request_unreference(req);
> @@ -7753,7 +7753,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
>   	if (req == NULL || INTEL_GEN(req->i915) < 6)
>   		return;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return;
>
>   	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
>
Chris Wilson June 28, 2016, 8:55 a.m. UTC | #2
On Mon, Jun 27, 2016 at 11:32:10AM +0100, Tvrtko Ursulin wrote:
> 
> On 25/06/16 11:12, Chris Wilson wrote:
> >When waiting for an interrupt (waiting for the GPU to complete some
> >work), we know we are the single waiter for the GPU. We also know when
> 
> You mean we know we were the first waiter, if we have been woken up?
> Or perhaps the single waiter for this request? (more or less, maybe
> close enough) Or also s/", we know we are/, and we know we are" ?

Single waiter for the engine. s/and//; it follows from and is not
conditional. To be here we know we are the only piece of code performing
this check. There may be threads entering i915_wait_request that also
get to spin before adding themselves to the waitqueue, but of all waiters,
we are the only one allowed to spin on this engine.
-Chris
Chris Wilson June 28, 2016, 9:17 a.m. UTC | #3
On Tue, Jun 28, 2016 at 09:55:17AM +0100, Chris Wilson wrote:
> On Mon, Jun 27, 2016 at 11:32:10AM +0100, Tvrtko Ursulin wrote:
> > 
> > On 25/06/16 11:12, Chris Wilson wrote:
> > >When waiting for an interrupt (waiting for the GPU to complete some
> > >work), we know we are the single waiter for the GPU. We also know when
> > 
> > You mean we know we were the first waiter, if we have been woken up?
> > Or perhaps the single waiter for this request? (more or less, maybe
> > close enough) Or also s/", we know we are/, and we know we are" ?
> 
> Single waiter for the engine. s/and//; it follows from and is not
> conditional. To be here we know we are the only piece of code performing
> this check. There may be threads entering i915_wait_request that also
> get to spin before adding themselves to the waitqueue, but of all waiters,
> we are the only one allowed to spin on this engine.

Changed to:

When waiting for an interrupt (waiting for the engine to complete some
work), we know we are the only waiter to be woken on this engine. We also 
know when the GPU has nearly completed our request (or at least started
processing it), so after being woken and we detect that the GPU is
active and working on our request, allow us the bottom-half (the first
waiter who wakes up to handle checking the seqno after the interrupt) to
spin for a very short while to reduce client latencies.

The impact is minimal, there was an improvement to the realtime-vs-many
clients case, but exporting the function proves useful later. However,
it is tempting to adjust irq_seqno_barrier to include the spin. The
problem is first ensuring that the "start-of-request" seqno is coherent
as we use that as our basis for judging when it is ok to spin. If we
could, spinning there could dramatically shorten some sleeps, and allow
us to make the barriers more conservative to handle missed seqno writes
on more platforms (all gen7+ are known to have the occasional issue, at 
least).
Tvrtko Ursulin June 28, 2016, 9:25 a.m. UTC | #4
On 28/06/16 10:17, Chris Wilson wrote:
> On Tue, Jun 28, 2016 at 09:55:17AM +0100, Chris Wilson wrote:
>> On Mon, Jun 27, 2016 at 11:32:10AM +0100, Tvrtko Ursulin wrote:
>>>
>>> On 25/06/16 11:12, Chris Wilson wrote:
>>>> When waiting for an interrupt (waiting for the GPU to complete some
>>>> work), we know we are the single waiter for the GPU. We also know when
>>>
>>> You mean we know we were the first waiter, if we have been woken up?
>>> Or perhaps the single waiter for this request? (more or less, maybe
>>> close enough) Or also s/", we know we are/, and we know we are" ?
>>
>> Single waiter for the engine. s/and//; it follows from and is not
>> conditional. To be here we know we are the only piece of code performing
>> this check. There may be threads entering i915_wait_request that also
>> get to spin before adding themselves to the waitqueue, but of all waiters,
>> we are the only one allowed to spin on this engine.
>
> Changed to:
>
> When waiting for an interrupt (waiting for the engine to complete some
> work), we know we are the only waiter to be woken on this engine. We also
> know when the GPU has nearly completed our request (or at least started
> processing it), so after being woken and we detect that the GPU is
> active and working on our request, allow us the bottom-half (the first
> waiter who wakes up to handle checking the seqno after the interrupt) to
> spin for a very short while to reduce client latencies.
>
> The impact is minimal, there was an improvement to the realtime-vs-many
> clients case, but exporting the function proves useful later. However,
> it is tempting to adjust irq_seqno_barrier to include the spin. The
> problem is first ensuring that the "start-of-request" seqno is coherent
> as we use that as our basis for judging when it is ok to spin. If we
> could, spinning there could dramatically shorten some sleeps, and allow
> us to make the barriers more conservative to handle missed seqno writes
> on more platforms (all gen7+ are known to have the occasional issue, at
> least).

Super clear now;

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c81c27dca74d..dba298c7a8b8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -663,7 +663,7 @@  static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 					   i915_gem_request_get_seqno(work->flip_queued_req),
 					   dev_priv->next_seqno,
 					   engine->get_seqno(engine),
-					   i915_gem_request_completed(work->flip_queued_req, true));
+					   i915_gem_request_completed(work->flip_queued_req));
 			} else
 				seq_printf(m, "Flip not associated with any ring\n");
 			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed92c434f4e6..4cc372df3534 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3277,24 +3277,27 @@  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
-					   bool lazy_coherency)
+static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->engine->irq_seqno_barrier)
-		req->engine->irq_seqno_barrier(req->engine);
 	return i915_seqno_passed(req->engine->get_seqno(req->engine),
 				 req->previous_seqno);
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
-					      bool lazy_coherency)
+static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->engine->irq_seqno_barrier)
-		req->engine->irq_seqno_barrier(req->engine);
 	return i915_seqno_passed(req->engine->get_seqno(req->engine),
 				 req->seqno);
 }
 
+bool __i915_spin_request(const struct drm_i915_gem_request *request,
+			 int state, unsigned long timeout_us);
+static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
+				     int state, unsigned long timeout_us)
+{
+	return (i915_gem_request_started(request) &&
+		__i915_spin_request(request, state, timeout_us));
+}
+
 int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
 
@@ -3972,6 +3975,8 @@  static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
 
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *engine = req->engine;
+
 	/* Ensure our read of the seqno is coherent so that we
 	 * do not "miss an interrupt" (i.e. if this is the last
 	 * request and the seqno write from the GPU is not visible
@@ -3983,7 +3988,10 @@  static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	 * but it is easier and safer to do it every time the waiter
 	 * is woken.
 	 */
-	if (i915_gem_request_completed(req, false))
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
+	if (i915_gem_request_completed(req))
 		return true;
 
 	/* We need to check whether any gpu reset happened in between
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fe5ba4aa336c..30bd0d0d917b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1375,9 +1375,9 @@  static bool busywait_stop(unsigned long timeout, unsigned cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
+bool __i915_spin_request(const struct drm_i915_gem_request *req,
+			 int state, unsigned long timeout_us)
 {
-	unsigned long timeout;
 	unsigned cpu;
 
 	/* When waiting for high frequency requests, e.g. during synchronous
@@ -1390,19 +1390,15 @@  static bool __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	/* Only spin if we know the GPU is processing this request */
-	if (!i915_gem_request_started(req, true))
-		return false;
-
-	timeout = local_clock_us(&cpu) + 5;
+	timeout_us += local_clock_us(&cpu);
 	do {
-		if (i915_gem_request_completed(req, true))
+		if (i915_gem_request_completed(req))
 			return true;
 
 		if (signal_pending_state(state, current))
 			break;
 
-		if (busywait_stop(timeout, cpu))
+		if (busywait_stop(timeout_us, cpu))
 			break;
 
 		cpu_relax_lowlatency();
@@ -1445,7 +1441,7 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (list_empty(&req->list))
 		return 0;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return 0;
 
 	timeout_remain = MAX_SCHEDULE_TIMEOUT;
@@ -1470,7 +1466,7 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
 	/* Optimistic spin for the next ~jiffie before touching IRQs */
-	if (__i915_spin_request(req, state))
+	if (i915_spin_request(req, state, 5))
 		goto complete;
 
 	set_current_state(state);
@@ -1512,6 +1508,10 @@  wakeup:
 		 */
 		if (__i915_request_irq_complete(req))
 			break;
+
+		/* Only spin if we know the GPU is processing this request */
+		if (i915_spin_request(req, state, 2))
+			break;
 	}
 	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
@@ -3050,8 +3050,16 @@  i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
 
+	/* We are called by the error capture and reset at a random
+	 * point in time. In particular, note that neither is crucially
+	 * ordered with an interrupt. After a hang, the GPU is dead and we
+	 * assume that no more writes can happen (we waited long enough for
+	 * all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 */
 	list_for_each_entry(request, &engine->request_list, list) {
-		if (i915_gem_request_completed(request, false))
+		if (i915_gem_request_completed(request))
 			continue;
 
 		return request;
@@ -3183,7 +3191,7 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 					   struct drm_i915_gem_request,
 					   list);
 
-		if (!i915_gem_request_completed(request, true))
+		if (!i915_gem_request_completed(request))
 			break;
 
 		i915_gem_request_retire(request);
@@ -3207,7 +3215,7 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 	}
 
 	if (unlikely(engine->trace_irq_req &&
-		     i915_gem_request_completed(engine->trace_irq_req, true))) {
+		     i915_gem_request_completed(engine->trace_irq_req))) {
 		engine->irq_put(engine);
 		i915_gem_request_assign(&engine->trace_irq_req, NULL);
 	}
@@ -3305,7 +3313,7 @@  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 		if (req == NULL)
 			continue;
 
-		if (i915_gem_request_completed(req, true))
+		if (i915_gem_request_completed(req))
 			i915_gem_object_retire__read(obj, i);
 	}
 
@@ -3413,7 +3421,7 @@  __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (to == from)
 		return 0;
 
-	if (i915_gem_request_completed(from_req, true))
+	if (i915_gem_request_completed(from_req))
 		return 0;
 
 	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 699da067bb85..adeafbc96760 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11594,7 +11594,7 @@  static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
 	vblank = intel_crtc_get_vblank_counter(intel_crtc);
 	if (work->flip_ready_vblank == 0) {
 		if (work->flip_queued_req &&
-		    !i915_gem_request_completed(work->flip_queued_req, true))
+		    !i915_gem_request_completed(work->flip_queued_req))
 			return false;
 
 		work->flip_ready_vblank = vblank;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4f612b96224e..2f54fc5d1131 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7739,7 +7739,7 @@  static void __intel_rps_boost_work(struct work_struct *work)
 	struct request_boost *boost = container_of(work, struct request_boost, work);
 	struct drm_i915_gem_request *req = boost->req;
 
-	if (!i915_gem_request_completed(req, true))
+	if (!i915_gem_request_completed(req))
 		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
 
 	i915_gem_request_unreference(req);
@@ -7753,7 +7753,7 @@  void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
 	if (req == NULL || INTEL_GEN(req->i915) < 6)
 		return;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return;
 
 	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);