diff mbox series

drm/i915/gt: Report the currently active execlists request

Message ID 20200117101314.2897102-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Report the currently active execlists request | expand

Commit Message

Chris Wilson Jan. 17, 2020, 10:13 a.m. UTC
Since commit 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy"), we
prune the engine->active.requests list prior to preemption, thus
removing the trace of the currently executing request. If that request
hangs rather than be preempted, we conclude that no active request was
on the GPU. Fortunately, this only impacts our debugging, and not our
means of hang detection or recovery.

References: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 29 ++++++++++++++++++-----
 1 file changed, 23 insertions(+), 6 deletions(-)

Comments

Mika Kuoppala Jan. 17, 2020, 11:23 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since commit 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy"), we
> prune the engine->active.requests list prior to preemption, thus
> removing the trace of the currently executing request. If that request
> hangs rather than be preempted, we conclude that no active request was
> on the GPU. Fortunately, this only impacts our debugging, and not our
> means of hang detection or recovery.
>
> References: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 29 ++++++++++++++++++-----
>  1 file changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 06ff7695fa29..93878fd42a7a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1643,7 +1643,7 @@ static bool match_ring(struct i915_request *rq)
>  struct i915_request *
>  intel_engine_find_active_request(struct intel_engine_cs *engine)
>  {
> -	struct i915_request *request, *active = NULL;
> +	struct i915_request *rq, *active = NULL;
>  
>  	/*
>  	 * We are called by the error capture, reset and to dump engine
> @@ -1657,18 +1657,35 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
>  	 * we only care about the snapshot of this moment.
>  	 */
>  	lockdep_assert_held(&engine->active.lock);
> -	list_for_each_entry(request, &engine->active.requests, sched.link) {
> -		if (i915_request_completed(request))
> +
> +	rcu_read_lock();
> +	rq = execlists_active(&engine->execlists);
> +	if (rq) {
> +		struct intel_timeline *tl = rq->context->timeline;
> +
> +		list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
> +			if (i915_request_completed(rq))
> +				break;

This will be active - 1. So it makes me think that
we should first check that the real active has not completed.

-Mika

> +
> +			active = rq;
> +		}
> +	}
> +	rcu_read_unlock();
> +	if (active)
> +		return active;
> +
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
> +		if (i915_request_completed(rq))
>  			continue;
>  
> -		if (!i915_request_started(request))
> +		if (!i915_request_started(rq))
>  			continue;
>  
>  		/* More than one preemptible request may match! */
> -		if (!match_ring(request))
> +		if (!match_ring(rq))
>  			continue;
>  
> -		active = request;
> +		active = rq;
>  		break;
>  	}
>  
> -- 
> 2.25.0
Chris Wilson Jan. 17, 2020, 11:28 a.m. UTC | #2
Quoting Mika Kuoppala (2020-01-17 11:23:01)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since commit 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy"), we
> > prune the engine->active.requests list prior to preemption, thus
> > removing the trace of the currently executing request. If that request
> > hangs rather than be preempted, we conclude that no active request was
> > on the GPU. Fortunately, this only impacts our debugging, and not our
> > means of hang detection or recovery.
> >
> > References: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 29 ++++++++++++++++++-----
> >  1 file changed, 23 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 06ff7695fa29..93878fd42a7a 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -1643,7 +1643,7 @@ static bool match_ring(struct i915_request *rq)
> >  struct i915_request *
> >  intel_engine_find_active_request(struct intel_engine_cs *engine)
> >  {
> > -     struct i915_request *request, *active = NULL;
> > +     struct i915_request *rq, *active = NULL;
> >  
> >       /*
> >        * We are called by the error capture, reset and to dump engine
> > @@ -1657,18 +1657,35 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
> >        * we only care about the snapshot of this moment.
> >        */
> >       lockdep_assert_held(&engine->active.lock);
> > -     list_for_each_entry(request, &engine->active.requests, sched.link) {
> > -             if (i915_request_completed(request))
> > +
> > +     rcu_read_lock();
> > +     rq = execlists_active(&engine->execlists);
> > +     if (rq) {
> > +             struct intel_timeline *tl = rq->context->timeline;
> > +
> > +             list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
> > +                     if (i915_request_completed(rq))
> > +                             break;
> 
> This will be active - 1. So it makes me think that
> we should first check that the real active has not completed.

Hmm, you want from_reverse in this case.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 06ff7695fa29..93878fd42a7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1643,7 +1643,7 @@  static bool match_ring(struct i915_request *rq)
 struct i915_request *
 intel_engine_find_active_request(struct intel_engine_cs *engine)
 {
-	struct i915_request *request, *active = NULL;
+	struct i915_request *rq, *active = NULL;
 
 	/*
 	 * We are called by the error capture, reset and to dump engine
@@ -1657,18 +1657,35 @@  intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * we only care about the snapshot of this moment.
 	 */
 	lockdep_assert_held(&engine->active.lock);
-	list_for_each_entry(request, &engine->active.requests, sched.link) {
-		if (i915_request_completed(request))
+
+	rcu_read_lock();
+	rq = execlists_active(&engine->execlists);
+	if (rq) {
+		struct intel_timeline *tl = rq->context->timeline;
+
+		list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+			if (i915_request_completed(rq))
+				break;
+
+			active = rq;
+		}
+	}
+	rcu_read_unlock();
+	if (active)
+		return active;
+
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
+		if (i915_request_completed(rq))
 			continue;
 
-		if (!i915_request_started(request))
+		if (!i915_request_started(rq))
 			continue;
 
 		/* More than one preemptible request may match! */
-		if (!match_ring(request))
+		if (!match_ring(rq))
 			continue;
 
-		active = request;
+		active = rq;
 		break;
 	}