diff mbox series

drm/i915/selftests: Pretty print the i915_active

Message ID 20191031101116.19894-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915/selftests: Pretty print the i915_active | expand

Commit Message

Chris Wilson Oct. 31, 2019, 10:11 a.m. UTC
If the idle_pulse fails to flush the i915_active, dump the tree to see
if that has any clues.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gt/selftest_engine_heartbeat.c   |  4 ++
 drivers/gpu/drm/i915/i915_active.h            |  2 +
 drivers/gpu/drm/i915/selftests/i915_active.c  | 45 +++++++++++++++++++
 3 files changed, 51 insertions(+)

Comments

Mika Kuoppala Oct. 31, 2019, 2:11 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> If the idle_pulse fails to flush the i915_active, dump the tree to see
> if that has any clues.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  .../drm/i915/gt/selftest_engine_heartbeat.c   |  4 ++
>  drivers/gpu/drm/i915/i915_active.h            |  2 +
>  drivers/gpu/drm/i915/selftests/i915_active.c  | 45 +++++++++++++++++++
>  3 files changed, 51 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> index 155c508024df..131c49ddf33f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
>  	pulse_unlock_wait(p); /* synchronize with the retirement callback */
>  
>  	if (!i915_active_is_idle(&p->active)) {
> +		struct drm_printer m = drm_err_printer("pulse");
> +
>  		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
>  		       engine->name);
> +		i915_active_print(&p->active, &m);
> +
>  		err = -EINVAL;
>  		goto out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 4f52fe6146d2..44859356ce97 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>  void i915_active_acquire_barrier(struct i915_active *ref);
>  void i915_request_add_active_barriers(struct i915_request *rq);
>  
> +void i915_active_print(struct i915_active *ref, struct drm_printer *m);
> +
>  #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index 96513a7d4739..260b0ee5d1e3 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
>  
>  	return i915_subtests(tests, i915);
>  }
> +
> +static struct intel_engine_cs *node_to_barrier(struct active_node *it)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	if (!is_barrier(&it->base))
> +		return NULL;
> +
> +	engine = __barrier_to_engine(it);
> +	smp_rmb(); /* serialise with add_active_barriers */

I did find the pair. Builds confidence.

> +	if (!is_barrier(&it->base))
> +		return NULL;
> +
> +	return engine;
> +}
> +
> +void i915_active_print(struct i915_active *ref, struct drm_printer *m)
> +{
> +	drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
> +	drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
> +	drm_printf(m, "\tpreallocated barriers? %s\n",
> +		   yesno(!llist_empty(&ref->preallocated_barriers)));
> +
> +	if (i915_active_acquire_if_busy(ref)) {
> +		struct active_node *it, *n;
> +
> +		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +			struct intel_engine_cs *engine;
> +

Does the aquire of ref keep the other lefs alive?
we seem to be safe on interation but the poking about
the fence set and timeline below is a question mark.

-Mika


> +			engine = node_to_barrier(it);
> +			if (engine) {
> +				drm_printf(m, "\tbarrier: %s\n", engine->name);
> +				continue;
> +			}
> +
> +			if (i915_active_fence_isset(&it->base)) {
> +				drm_printf(m,
> +					   "\ttimeline: %llx\n", it->timeline);
> +				continue;
> +			}
> +		}
> +
> +		i915_active_release(ref);
> +	}
> +}
> -- 
> 2.24.0.rc1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 31, 2019, 2:18 p.m. UTC | #2
Quoting Mika Kuoppala (2019-10-31 14:11:58)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > If the idle_pulse fails to flush the i915_active, dump the tree to see
> > if that has any clues.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  .../drm/i915/gt/selftest_engine_heartbeat.c   |  4 ++
> >  drivers/gpu/drm/i915/i915_active.h            |  2 +
> >  drivers/gpu/drm/i915/selftests/i915_active.c  | 45 +++++++++++++++++++
> >  3 files changed, 51 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> > index 155c508024df..131c49ddf33f 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> > @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
> >       pulse_unlock_wait(p); /* synchronize with the retirement callback */
> >  
> >       if (!i915_active_is_idle(&p->active)) {
> > +             struct drm_printer m = drm_err_printer("pulse");
> > +
> >               pr_err("%s: heartbeat pulse did not flush idle tasks\n",
> >                      engine->name);
> > +             i915_active_print(&p->active, &m);
> > +
> >               err = -EINVAL;
> >               goto out;
> >       }
> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> > index 4f52fe6146d2..44859356ce97 100644
> > --- a/drivers/gpu/drm/i915/i915_active.h
> > +++ b/drivers/gpu/drm/i915/i915_active.h
> > @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> >  void i915_active_acquire_barrier(struct i915_active *ref);
> >  void i915_request_add_active_barriers(struct i915_request *rq);
> >  
> > +void i915_active_print(struct i915_active *ref, struct drm_printer *m);
> > +
> >  #endif /* _I915_ACTIVE_H_ */
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> > index 96513a7d4739..260b0ee5d1e3 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> > @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
> >  
> >       return i915_subtests(tests, i915);
> >  }
> > +
> > +static struct intel_engine_cs *node_to_barrier(struct active_node *it)
> > +{
> > +     struct intel_engine_cs *engine;
> > +
> > +     if (!is_barrier(&it->base))
> > +             return NULL;
> > +
> > +     engine = __barrier_to_engine(it);
> > +     smp_rmb(); /* serialise with add_active_barriers */
> 
> I did find the pair. Builds confidence.
> 
> > +     if (!is_barrier(&it->base))
> > +             return NULL;
> > +
> > +     return engine;
> > +}
> > +
> > +void i915_active_print(struct i915_active *ref, struct drm_printer *m)
> > +{
> > +     drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
> > +     drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
> > +     drm_printf(m, "\tpreallocated barriers? %s\n",
> > +                yesno(!llist_empty(&ref->preallocated_barriers)));
> > +
> > +     if (i915_active_acquire_if_busy(ref)) {
> > +             struct active_node *it, *n;
> > +
> > +             rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> > +                     struct intel_engine_cs *engine;
> > +
> 
> Does the aquire of ref keep the other lefs alive?
> we seem to be safe on interation but the poking about
> the fence set and timeline below is a question mark.

It prevents the tree+nodes from being freed, so we only have to worry
about the validity of the meaning of the contents.

My memory says, and my assumption in this code, is that the
the iterator is safe against insertions -- we won't get horribly lost if
the tree is rebalanced as we walk.
-Chris
Chris Wilson Oct. 31, 2019, 2:33 p.m. UTC | #3
Quoting Chris Wilson (2019-10-31 14:18:56)
> My memory says, and my assumption in this code, is that the
> the iterator is safe against insertions -- we won't get horribly lost if
> the tree is rebalanced as we walk.

Actually, the iterator is not perfect across rebalances. It won't matter
here in the selftest, since we are the only accessor, the two other
users deserve throught.

In __active_retire, we have exclusive access to the tree as we are
freeing the nodes. Safe.

In i915_active_wait() [we can't take the mutex here due to shrinker
inversions!], we walk the tree to kick signaling on the nodes. So the
iterator is not perfect, but calling enable_signaling() is mostly an
optimisation so that we don't have to wait upon the background flush. So
I think we are safe to miss nodes, so long as the iterator itself is
bounded (which it must be).
-Chris
Mika Kuoppala Oct. 31, 2019, 2:34 p.m. UTC | #4
Chris Wilson <chris@chris-wilson.co.uk> writes:

> If the idle_pulse fails to flush the i915_active, dump the tree to see
> if that has any clues.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  .../drm/i915/gt/selftest_engine_heartbeat.c   |  4 ++
>  drivers/gpu/drm/i915/i915_active.h            |  2 +
>  drivers/gpu/drm/i915/selftests/i915_active.c  | 45 +++++++++++++++++++
>  3 files changed, 51 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> index 155c508024df..131c49ddf33f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
>  	pulse_unlock_wait(p); /* synchronize with the retirement callback */
>  
>  	if (!i915_active_is_idle(&p->active)) {
> +		struct drm_printer m = drm_err_printer("pulse");
> +
>  		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
>  		       engine->name);
> +		i915_active_print(&p->active, &m);
> +
>  		err = -EINVAL;
>  		goto out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 4f52fe6146d2..44859356ce97 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>  void i915_active_acquire_barrier(struct i915_active *ref);
>  void i915_request_add_active_barriers(struct i915_request *rq);
>  
> +void i915_active_print(struct i915_active *ref, struct drm_printer *m);
> +
>  #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index 96513a7d4739..260b0ee5d1e3 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
>  
>  	return i915_subtests(tests, i915);
>  }
> +
> +static struct intel_engine_cs *node_to_barrier(struct active_node *it)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	if (!is_barrier(&it->base))
> +		return NULL;
> +
> +	engine = __barrier_to_engine(it);
> +	smp_rmb(); /* serialise with add_active_barriers */
> +	if (!is_barrier(&it->base))
> +		return NULL;
> +
> +	return engine;
> +}
> +
> +void i915_active_print(struct i915_active *ref, struct drm_printer *m)
> +{
> +	drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
> +	drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
> +	drm_printf(m, "\tpreallocated barriers? %s\n",
> +		   yesno(!llist_empty(&ref->preallocated_barriers)));
> +
> +	if (i915_active_acquire_if_busy(ref)) {
> +		struct active_node *it, *n;
> +
> +		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +			struct intel_engine_cs *engine;
> +
> +			engine = node_to_barrier(it);
> +			if (engine) {
> +				drm_printf(m, "\tbarrier: %s\n", engine->name);
> +				continue;
> +			}
> +
> +			if (i915_active_fence_isset(&it->base)) {
> +				drm_printf(m,
> +					   "\ttimeline: %llx\n", it->timeline);
> +				continue;
> +			}
> +		}
> +
> +		i915_active_release(ref);
> +	}
> +}
> -- 
> 2.24.0.rc1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 155c508024df..131c49ddf33f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -100,8 +100,12 @@  static int __live_idle_pulse(struct intel_engine_cs *engine,
 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
 
 	if (!i915_active_is_idle(&p->active)) {
+		struct drm_printer m = drm_err_printer("pulse");
+
 		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
 		       engine->name);
+		i915_active_print(&p->active, &m);
+
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 4f52fe6146d2..44859356ce97 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -214,4 +214,6 @@  int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 void i915_active_acquire_barrier(struct i915_active *ref);
 void i915_request_add_active_barriers(struct i915_request *rq);
 
+void i915_active_print(struct i915_active *ref, struct drm_printer *m);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 96513a7d4739..260b0ee5d1e3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -205,3 +205,48 @@  int i915_active_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static struct intel_engine_cs *node_to_barrier(struct active_node *it)
+{
+	struct intel_engine_cs *engine;
+
+	if (!is_barrier(&it->base))
+		return NULL;
+
+	engine = __barrier_to_engine(it);
+	smp_rmb(); /* serialise with add_active_barriers */
+	if (!is_barrier(&it->base))
+		return NULL;
+
+	return engine;
+}
+
+void i915_active_print(struct i915_active *ref, struct drm_printer *m)
+{
+	drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
+	drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
+	drm_printf(m, "\tpreallocated barriers? %s\n",
+		   yesno(!llist_empty(&ref->preallocated_barriers)));
+
+	if (i915_active_acquire_if_busy(ref)) {
+		struct active_node *it, *n;
+
+		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+			struct intel_engine_cs *engine;
+
+			engine = node_to_barrier(it);
+			if (engine) {
+				drm_printf(m, "\tbarrier: %s\n", engine->name);
+				continue;
+			}
+
+			if (i915_active_fence_isset(&it->base)) {
+				drm_printf(m,
+					   "\ttimeline: %llx\n", it->timeline);
+				continue;
+			}
+		}
+
+		i915_active_release(ref);
+	}
+}