diff mbox

[020/190] drm/i915: Remove the lazy_coherency parameter from request-completed?

Message ID 1452503961-14837-20-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Jan. 11, 2016, 9:16 a.m. UTC
Now that we have split out the seqno-barrier from the
engine->get_seqno() callback itself, we can move the users of the
seqno-barrier to the required callsites simplifying the common code and
making the required workaround handling much more explicit.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h      | 17 ++++++++---------
 drivers/gpu/drm/i915/i915_gem.c      | 24 ++++++++++++++++--------
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
 5 files changed, 29 insertions(+), 22 deletions(-)

Comments

Dave Gordon Jan. 11, 2016, 3:45 p.m. UTC | #1
On 11/01/16 09:16, Chris Wilson wrote:
> Now that we have split out the seqno-barrier from the
> engine->get_seqno() callback itself, we can move the users of the
> seqno-barrier to the required callsites simplifying the common code and
> making the required workaround handling much more explicit.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c  |  4 ++--
>   drivers/gpu/drm/i915/i915_drv.h      | 17 ++++++++---------
>   drivers/gpu/drm/i915/i915_gem.c      | 24 ++++++++++++++++--------
>   drivers/gpu/drm/i915/intel_display.c |  2 +-
>   drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
>   5 files changed, 29 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 1499e2337e5d..d09e48455dcb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>   					   i915_gem_request_get_seqno(work->flip_queued_req),
>   					   dev_priv->next_seqno,
>   					   ring->get_seqno(ring),
> -					   i915_gem_request_completed(work->flip_queued_req, true));
> +					   i915_gem_request_completed(work->flip_queued_req));
>   			} else
>   				seq_printf(m, "Flip not associated with any ring\n");
>   			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> @@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   	intel_runtime_pm_get(dev_priv);
>
>   	for_each_ring(ring, dev_priv, i) {
> -		seqno[i] = ring->get_seqno(ring);
>   		acthd[i] = intel_ring_get_active_head(ring);
> +		seqno[i] = ring->get_seqno(ring);
>   	}
>
>   	i915_get_extra_instdone(dev, instdone);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9762aa76bb0a..44d46018ee13 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2969,20 +2969,14 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>   	return (int32_t)(seq1 - seq2) >= 0;
>   }
>
> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> -					   bool lazy_coherency)
> +static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->ring->irq_seqno_barrier)
> -		req->ring->irq_seqno_barrier(req->ring);
>   	return i915_seqno_passed(req->ring->get_seqno(req->ring),
>   				 req->previous_seqno);
>   }
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> -					      bool lazy_coherency)
> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
>   {
> -	if (!lazy_coherency && req->ring->irq_seqno_barrier)
> -		req->ring->irq_seqno_barrier(req->ring);
>   	return i915_seqno_passed(req->ring->get_seqno(req->ring),
>   				 req->seqno);
>   }
> @@ -3636,6 +3630,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *ring,
>
>   static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   {
> +	struct intel_engine_cs *engine = req->ring;
> +
>   	/* Ensure our read of the seqno is coherent so that we
>   	 * do not "miss an interrupt" (i.e. if this is the last
>   	 * request and the seqno write from the GPU is not visible
> @@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>   	 * but it is easier and safer to do it every time the waiter
>   	 * is woken.
>   	 */
> -	if (i915_gem_request_completed(req, false))
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);

I'm still not convinced that this is the right place for the magic, but 
at least it's preferable to having a lazy_coherency parameter. So on the 
basis that the proper review criterion is "better than before, and 
creates no new problems", and not "fixes all known issues", then

Reviewed-by: Dave Gordon <david.s.gordon@intel.com>

> +
> +	if (i915_gem_request_completed(req))
>   		return true;
>
>   	/* We need to check whether any gpu reset happened in between
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4b26529f1f44..d125820c6309 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1171,12 +1171,12 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req,
>   	 */
>
>   	/* Only spin if we know the GPU is processing this request */
> -	if (!i915_gem_request_started(req, true))
> +	if (!i915_gem_request_started(req))
>   		return false;
>
>   	timeout = local_clock_us(&cpu) + 5;
>   	do {
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>   			return true;
>
>   		if (signal_pending_state(state, wait->task))
> @@ -1228,7 +1228,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>   	if (list_empty(&req->list))
>   		return 0;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return 0;
>
>   	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> @@ -2724,8 +2724,16 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
>   {
>   	struct drm_i915_gem_request *request;
>
> +	/* We are called by the error capture and reset at a random
> +	 * point in time. In particular, note that neither is crucially
> +	 * ordered with an interrupt. After a hang, the GPU is dead and we
> +	 * assume that no more writes can happen (we waited long enough for
> +	 * all writes that were in transaction to be flushed) - adding an
> +	 * extra delay for a recent interrupt is pointless. Hence, we do
> +	 * not need an engine->irq_seqno_barrier() before the seqno reads.
> +	 */
>   	list_for_each_entry(request, &ring->request_list, list) {
> -		if (i915_gem_request_completed(request, false))
> +		if (i915_gem_request_completed(request))
>   			continue;
>
>   		return request;
> @@ -2859,7 +2867,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>   					   struct drm_i915_gem_request,
>   					   list);
>
> -		if (!i915_gem_request_completed(request, true))
> +		if (!i915_gem_request_completed(request))
>   			break;
>
>   		i915_gem_request_retire(request);
> @@ -2883,7 +2891,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>   	}
>
>   	if (unlikely(ring->trace_irq_req &&
> -		     i915_gem_request_completed(ring->trace_irq_req, true))) {
> +		     i915_gem_request_completed(ring->trace_irq_req))) {
>   		ring->irq_put(ring);
>   		i915_gem_request_assign(&ring->trace_irq_req, NULL);
>   	}
> @@ -2995,7 +3003,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>   		if (list_empty(&req->list))
>   			goto retire;
>
> -		if (i915_gem_request_completed(req, true)) {
> +		if (i915_gem_request_completed(req)) {
>   			__i915_gem_request_retire__upto(req);
>   retire:
>   			i915_gem_object_retire__read(obj, i);
> @@ -3104,7 +3112,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   	if (to == from)
>   		return 0;
>
> -	if (i915_gem_request_completed(from_req, true))
> +	if (i915_gem_request_completed(from_req))
>   		return 0;
>
>   	if (!i915_semaphore_is_enabled(obj->base.dev)) {
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 7e36f85d3109..de4d4a0d923a 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11523,7 +11523,7 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
>
>   	if (work->flip_ready_vblank == 0) {
>   		if (work->flip_queued_req &&
> -		    !i915_gem_request_completed(work->flip_queued_req, true))
> +		    !i915_gem_request_completed(work->flip_queued_req))
>   			return false;
>
>   		work->flip_ready_vblank = drm_crtc_vblank_count(crtc);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9df9e9a22f3c..401c3770057d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7286,7 +7286,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	struct request_boost *boost = container_of(work, struct request_boost, work);
>   	struct drm_i915_gem_request *req = boost->req;
>
> -	if (!i915_gem_request_completed(req, true))
> +	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(to_i915(req->ring->dev), NULL,
>   			       req->emitted_jiffies);
>
> @@ -7302,7 +7302,7 @@ void intel_queue_rps_boost_for_request(struct drm_device *dev,
>   	if (req == NULL || INTEL_INFO(dev)->gen < 6)
>   		return;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>   		return;
>
>   	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
>
Chris Wilson Jan. 11, 2016, 4:24 p.m. UTC | #2
On Mon, Jan 11, 2016 at 03:45:08PM +0000, Dave Gordon wrote:
> >@@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
> >  	 * but it is easier and safer to do it every time the waiter
> >  	 * is woken.
> >  	 */
> >-	if (i915_gem_request_completed(req, false))
> >+	if (engine->irq_seqno_barrier)
> >+		engine->irq_seqno_barrier(engine);
> 
> I'm still not convinced that this is the right place for the magic,
> but at least it's preferable to having a lazy_coherency parameter.
> So on the basis that the proper review criterion is "better than
> before, and creates no new problems", and not "fixes all known
> issues", then

I've tried very hard to explain why this must be, and only be, in the
interrupt handler bottom-half. The basic premise is that we need some
delay between the interrupt and the seqno read (on certain platforms) to
compensate for the unordered nature of the MI_STORE_DWORD_INDEX vs
MI_USER_INTERRUPT. There are other ways of inserting that delay, mostly
by adding the in the ring between the data store and the interrupt, but
my preference for only inserting the delay as required on the waiter
side rather than after every single batch (based on the current state of
affairs where waits should be rare). Even more so if we can convince
userspace to switch over to userspace fences, rather than hitting the
ioctls everytime. If you are interested, the mesa side looks like
http://cgit.freedesktop.org/~ickle/mesa/commit/?h=brw-batch2&id=f2c13b212dd30562db7b405d6ea79c87456ead51

The opposite question is where do you think is the right place to insert
the interrupt delay?
-Chris
Mika Kuoppala Jan. 12, 2016, 10:27 a.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Now that we have split out the seqno-barrier from the
> engine->get_seqno() callback itself, we can move the users of the
> seqno-barrier to the required callsites simplifying the common code and
> making the required workaround handling much more explicit.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  |  4 ++--
>  drivers/gpu/drm/i915/i915_drv.h      | 17 ++++++++---------
>  drivers/gpu/drm/i915/i915_gem.c      | 24 ++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_display.c |  2 +-
>  drivers/gpu/drm/i915/intel_pm.c      |  4 ++--
>  5 files changed, 29 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 1499e2337e5d..d09e48455dcb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
>  					   i915_gem_request_get_seqno(work->flip_queued_req),
>  					   dev_priv->next_seqno,
>  					   ring->get_seqno(ring),
> -					   i915_gem_request_completed(work->flip_queued_req, true));
> +					   i915_gem_request_completed(work->flip_queued_req));
>  			} else
>  				seq_printf(m, "Flip not associated with any ring\n");
>  			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
> @@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	intel_runtime_pm_get(dev_priv);
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		seqno[i] = ring->get_seqno(ring);
>  		acthd[i] = intel_ring_get_active_head(ring);
> +		seqno[i] = ring->get_seqno(ring);

Oh! Perhaps I am overly optimistic but did you just show how to solve
the 'hangcheck elapsed <random> ring idle..' coherency issue
in hangcheck?

I would like to have a separate patch that does this ordering
in here and also in i915_hangcheck_elapsed to be in the safe
side, regardless.

Or at minimum, copy the acthd read, seqno read ordering
into the i915_hangcheck_elapsed also.

-Mika


>  	}
>  
>  	i915_get_extra_instdone(dev, instdone);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9762aa76bb0a..44d46018ee13 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2969,20 +2969,14 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>  	return (int32_t)(seq1 - seq2) >= 0;
>  }
>  
> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> -					   bool lazy_coherency)
> +static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
>  {
> -	if (!lazy_coherency && req->ring->irq_seqno_barrier)
> -		req->ring->irq_seqno_barrier(req->ring);
>  	return i915_seqno_passed(req->ring->get_seqno(req->ring),
>  				 req->previous_seqno);
>  }
>  
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> -					      bool lazy_coherency)
> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
>  {
> -	if (!lazy_coherency && req->ring->irq_seqno_barrier)
> -		req->ring->irq_seqno_barrier(req->ring);
>  	return i915_seqno_passed(req->ring->get_seqno(req->ring),
>  				 req->seqno);
>  }
> @@ -3636,6 +3630,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *ring,
>  
>  static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>  {
> +	struct intel_engine_cs *engine = req->ring;
> +
>  	/* Ensure our read of the seqno is coherent so that we
>  	 * do not "miss an interrupt" (i.e. if this is the last
>  	 * request and the seqno write from the GPU is not visible
> @@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>  	 * but it is easier and safer to do it every time the waiter
>  	 * is woken.
>  	 */
> -	if (i915_gem_request_completed(req, false))
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	if (i915_gem_request_completed(req))
>  		return true;
>  
>  	/* We need to check whether any gpu reset happened in between
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4b26529f1f44..d125820c6309 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1171,12 +1171,12 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req,
>  	 */
>  
>  	/* Only spin if we know the GPU is processing this request */
> -	if (!i915_gem_request_started(req, true))
> +	if (!i915_gem_request_started(req))
>  		return false;
>  
>  	timeout = local_clock_us(&cpu) + 5;
>  	do {
> -		if (i915_gem_request_completed(req, true))
> +		if (i915_gem_request_completed(req))
>  			return true;
>  
>  		if (signal_pending_state(state, wait->task))
> @@ -1228,7 +1228,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  	if (list_empty(&req->list))
>  		return 0;
>  
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>  		return 0;
>  
>  	timeout_remain = MAX_SCHEDULE_TIMEOUT;
> @@ -2724,8 +2724,16 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
>  {
>  	struct drm_i915_gem_request *request;
>  
> +	/* We are called by the error capture and reset at a random
> +	 * point in time. In particular, note that neither is crucially
> +	 * ordered with an interrupt. After a hang, the GPU is dead and we
> +	 * assume that no more writes can happen (we waited long enough for
> +	 * all writes that were in transaction to be flushed) - adding an
> +	 * extra delay for a recent interrupt is pointless. Hence, we do
> +	 * not need an engine->irq_seqno_barrier() before the seqno reads.
> +	 */
>  	list_for_each_entry(request, &ring->request_list, list) {
> -		if (i915_gem_request_completed(request, false))
> +		if (i915_gem_request_completed(request))
>  			continue;
>  
>  		return request;
> @@ -2859,7 +2867,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  					   struct drm_i915_gem_request,
>  					   list);
>  
> -		if (!i915_gem_request_completed(request, true))
> +		if (!i915_gem_request_completed(request))
>  			break;
>  
>  		i915_gem_request_retire(request);
> @@ -2883,7 +2891,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  	}
>  
>  	if (unlikely(ring->trace_irq_req &&
> -		     i915_gem_request_completed(ring->trace_irq_req, true))) {
> +		     i915_gem_request_completed(ring->trace_irq_req))) {
>  		ring->irq_put(ring);
>  		i915_gem_request_assign(&ring->trace_irq_req, NULL);
>  	}
> @@ -2995,7 +3003,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  		if (list_empty(&req->list))
>  			goto retire;
>  
> -		if (i915_gem_request_completed(req, true)) {
> +		if (i915_gem_request_completed(req)) {
>  			__i915_gem_request_retire__upto(req);
>  retire:
>  			i915_gem_object_retire__read(obj, i);
> @@ -3104,7 +3112,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  	if (to == from)
>  		return 0;
>  
> -	if (i915_gem_request_completed(from_req, true))
> +	if (i915_gem_request_completed(from_req))
>  		return 0;
>  
>  	if (!i915_semaphore_is_enabled(obj->base.dev)) {
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 7e36f85d3109..de4d4a0d923a 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11523,7 +11523,7 @@ static bool __intel_pageflip_stall_check(struct drm_device *dev,
>  
>  	if (work->flip_ready_vblank == 0) {
>  		if (work->flip_queued_req &&
> -		    !i915_gem_request_completed(work->flip_queued_req, true))
> +		    !i915_gem_request_completed(work->flip_queued_req))
>  			return false;
>  
>  		work->flip_ready_vblank = drm_crtc_vblank_count(crtc);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9df9e9a22f3c..401c3770057d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7286,7 +7286,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>  	struct request_boost *boost = container_of(work, struct request_boost, work);
>  	struct drm_i915_gem_request *req = boost->req;
>  
> -	if (!i915_gem_request_completed(req, true))
> +	if (!i915_gem_request_completed(req))
>  		gen6_rps_boost(to_i915(req->ring->dev), NULL,
>  			       req->emitted_jiffies);
>  
> @@ -7302,7 +7302,7 @@ void intel_queue_rps_boost_for_request(struct drm_device *dev,
>  	if (req == NULL || INTEL_INFO(dev)->gen < 6)
>  		return;
>  
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(req))
>  		return;
>  
>  	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
> -- 
> 2.7.0.rc3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Jan. 12, 2016, 10:51 a.m. UTC | #4
On Tue, Jan 12, 2016 at 12:27:05PM +0200, Mika Kuoppala wrote:
> >  	for_each_ring(ring, dev_priv, i) {
> > -		seqno[i] = ring->get_seqno(ring);
> >  		acthd[i] = intel_ring_get_active_head(ring);
> > +		seqno[i] = ring->get_seqno(ring);
> 
> Oh! Perhaps I am overly optimistic but did you just show how to solve
> the 'hangcheck elapsed <random> ring idle..' coherency issue
> in hangcheck?

No. There are two causes, one is that we genuinely inspect the seqno
before it is written in the interrupt bottom-half and the other is
indeed the race you keep mentioning between the hangcheck looking at
waiters and the waiter setting itself up.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1499e2337e5d..d09e48455dcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -601,7 +601,7 @@  static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 					   i915_gem_request_get_seqno(work->flip_queued_req),
 					   dev_priv->next_seqno,
 					   ring->get_seqno(ring),
-					   i915_gem_request_completed(work->flip_queued_req, true));
+					   i915_gem_request_completed(work->flip_queued_req));
 			} else
 				seq_printf(m, "Flip not associated with any ring\n");
 			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
@@ -1354,8 +1354,8 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	intel_runtime_pm_get(dev_priv);
 
 	for_each_ring(ring, dev_priv, i) {
-		seqno[i] = ring->get_seqno(ring);
 		acthd[i] = intel_ring_get_active_head(ring);
+		seqno[i] = ring->get_seqno(ring);
 	}
 
 	i915_get_extra_instdone(dev, instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9762aa76bb0a..44d46018ee13 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2969,20 +2969,14 @@  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
-					   bool lazy_coherency)
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->ring->irq_seqno_barrier)
-		req->ring->irq_seqno_barrier(req->ring);
 	return i915_seqno_passed(req->ring->get_seqno(req->ring),
 				 req->previous_seqno);
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
-					      bool lazy_coherency)
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
-	if (!lazy_coherency && req->ring->irq_seqno_barrier)
-		req->ring->irq_seqno_barrier(req->ring);
 	return i915_seqno_passed(req->ring->get_seqno(req->ring),
 				 req->seqno);
 }
@@ -3636,6 +3630,8 @@  static inline void i915_trace_irq_get(struct intel_engine_cs *ring,
 
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
+	struct intel_engine_cs *engine = req->ring;
+
 	/* Ensure our read of the seqno is coherent so that we
 	 * do not "miss an interrupt" (i.e. if this is the last
 	 * request and the seqno write from the GPU is not visible
@@ -3647,7 +3643,10 @@  static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	 * but it is easier and safer to do it every time the waiter
 	 * is woken.
 	 */
-	if (i915_gem_request_completed(req, false))
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
+	if (i915_gem_request_completed(req))
 		return true;
 
 	/* We need to check whether any gpu reset happened in between
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b26529f1f44..d125820c6309 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1171,12 +1171,12 @@  static bool __i915_spin_request(struct drm_i915_gem_request *req,
 	 */
 
 	/* Only spin if we know the GPU is processing this request */
-	if (!i915_gem_request_started(req, true))
+	if (!i915_gem_request_started(req))
 		return false;
 
 	timeout = local_clock_us(&cpu) + 5;
 	do {
-		if (i915_gem_request_completed(req, true))
+		if (i915_gem_request_completed(req))
 			return true;
 
 		if (signal_pending_state(state, wait->task))
@@ -1228,7 +1228,7 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (list_empty(&req->list))
 		return 0;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return 0;
 
 	timeout_remain = MAX_SCHEDULE_TIMEOUT;
@@ -2724,8 +2724,16 @@  i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *request;
 
+	/* We are called by the error capture and reset at a random
+	 * point in time. In particular, note that neither is crucially
+	 * ordered with an interrupt. After a hang, the GPU is dead and we
+	 * assume that no more writes can happen (we waited long enough for
+	 * all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 */
 	list_for_each_entry(request, &ring->request_list, list) {
-		if (i915_gem_request_completed(request, false))
+		if (i915_gem_request_completed(request))
 			continue;
 
 		return request;
@@ -2859,7 +2867,7 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 					   struct drm_i915_gem_request,
 					   list);
 
-		if (!i915_gem_request_completed(request, true))
+		if (!i915_gem_request_completed(request))
 			break;
 
 		i915_gem_request_retire(request);
@@ -2883,7 +2891,7 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 	}
 
 	if (unlikely(ring->trace_irq_req &&
-		     i915_gem_request_completed(ring->trace_irq_req, true))) {
+		     i915_gem_request_completed(ring->trace_irq_req))) {
 		ring->irq_put(ring);
 		i915_gem_request_assign(&ring->trace_irq_req, NULL);
 	}
@@ -2995,7 +3003,7 @@  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 		if (list_empty(&req->list))
 			goto retire;
 
-		if (i915_gem_request_completed(req, true)) {
+		if (i915_gem_request_completed(req)) {
 			__i915_gem_request_retire__upto(req);
 retire:
 			i915_gem_object_retire__read(obj, i);
@@ -3104,7 +3112,7 @@  __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (to == from)
 		return 0;
 
-	if (i915_gem_request_completed(from_req, true))
+	if (i915_gem_request_completed(from_req))
 		return 0;
 
 	if (!i915_semaphore_is_enabled(obj->base.dev)) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7e36f85d3109..de4d4a0d923a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11523,7 +11523,7 @@  static bool __intel_pageflip_stall_check(struct drm_device *dev,
 
 	if (work->flip_ready_vblank == 0) {
 		if (work->flip_queued_req &&
-		    !i915_gem_request_completed(work->flip_queued_req, true))
+		    !i915_gem_request_completed(work->flip_queued_req))
 			return false;
 
 		work->flip_ready_vblank = drm_crtc_vblank_count(crtc);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9df9e9a22f3c..401c3770057d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7286,7 +7286,7 @@  static void __intel_rps_boost_work(struct work_struct *work)
 	struct request_boost *boost = container_of(work, struct request_boost, work);
 	struct drm_i915_gem_request *req = boost->req;
 
-	if (!i915_gem_request_completed(req, true))
+	if (!i915_gem_request_completed(req))
 		gen6_rps_boost(to_i915(req->ring->dev), NULL,
 			       req->emitted_jiffies);
 
@@ -7302,7 +7302,7 @@  void intel_queue_rps_boost_for_request(struct drm_device *dev,
 	if (req == NULL || INTEL_INFO(dev)->gen < 6)
 		return;
 
-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(req))
 		return;
 
 	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);