Message ID | 20191031101116.19894-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/selftests: Pretty print the i915_active | expand |
Chris Wilson <chris@chris-wilson.co.uk> writes: > If the idle_pulse fails to flush the i915_active, dump the tree to see > if that has any clues. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > .../drm/i915/gt/selftest_engine_heartbeat.c | 4 ++ > drivers/gpu/drm/i915/i915_active.h | 2 + > drivers/gpu/drm/i915/selftests/i915_active.c | 45 +++++++++++++++++++ > 3 files changed, 51 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > index 155c508024df..131c49ddf33f 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, > pulse_unlock_wait(p); /* synchronize with the retirement callback */ > > if (!i915_active_is_idle(&p->active)) { > + struct drm_printer m = drm_err_printer("pulse"); > + > pr_err("%s: heartbeat pulse did not flush idle tasks\n", > engine->name); > + i915_active_print(&p->active, &m); > + > err = -EINVAL; > goto out; > } > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index 4f52fe6146d2..44859356ce97 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, > void i915_active_acquire_barrier(struct i915_active *ref); > void i915_request_add_active_barriers(struct i915_request *rq); > > +void i915_active_print(struct i915_active *ref, struct drm_printer *m); > + > #endif /* _I915_ACTIVE_H_ */ > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c > index 96513a7d4739..260b0ee5d1e3 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_active.c > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c > @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915) > > return i915_subtests(tests, i915); > } > + > +static struct intel_engine_cs *node_to_barrier(struct active_node *it) > +{ > + struct intel_engine_cs *engine; > + > + if (!is_barrier(&it->base)) > + return NULL; > + > + engine = __barrier_to_engine(it); > + smp_rmb(); /* serialise with add_active_barriers */ I did find the pair. Builds confidence. > + if (!is_barrier(&it->base)) > + return NULL; > + > + return engine; > +} > + > +void i915_active_print(struct i915_active *ref, struct drm_printer *m) > +{ > + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); > + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); > + drm_printf(m, "\tpreallocated barriers? %s\n", > + yesno(!llist_empty(&ref->preallocated_barriers))); > + > + if (i915_active_acquire_if_busy(ref)) { > + struct active_node *it, *n; > + > + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { > + struct intel_engine_cs *engine; > + Does the aquire of ref keep the other lefs alive? we seem to be safe on interation but the poking about the fence set and timeline below is a question mark. -Mika > + engine = node_to_barrier(it); > + if (engine) { > + drm_printf(m, "\tbarrier: %s\n", engine->name); > + continue; > + } > + > + if (i915_active_fence_isset(&it->base)) { > + drm_printf(m, > + "\ttimeline: %llx\n", it->timeline); > + continue; > + } > + } > + > + i915_active_release(ref); > + } > +} > -- > 2.24.0.rc1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Mika Kuoppala (2019-10-31 14:11:58) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > If the idle_pulse fails to flush the i915_active, dump the tree to see > > if that has any clues. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > .../drm/i915/gt/selftest_engine_heartbeat.c | 4 ++ > > drivers/gpu/drm/i915/i915_active.h | 2 + > > drivers/gpu/drm/i915/selftests/i915_active.c | 45 +++++++++++++++++++ > > 3 files changed, 51 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > > index 155c508024df..131c49ddf33f 100644 > > --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > > @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, > > pulse_unlock_wait(p); /* synchronize with the retirement callback */ > > > > if (!i915_active_is_idle(&p->active)) { > > + struct drm_printer m = drm_err_printer("pulse"); > > + > > pr_err("%s: heartbeat pulse did not flush idle tasks\n", > > engine->name); > > + i915_active_print(&p->active, &m); > > + > > err = -EINVAL; > > goto out; > > } > > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > > index 4f52fe6146d2..44859356ce97 100644 > > --- a/drivers/gpu/drm/i915/i915_active.h > > +++ b/drivers/gpu/drm/i915/i915_active.h > > @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, > > void i915_active_acquire_barrier(struct i915_active *ref); > > void i915_request_add_active_barriers(struct i915_request *rq); > > > > +void i915_active_print(struct i915_active *ref, struct drm_printer *m); > > + > > #endif /* _I915_ACTIVE_H_ */ > > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c > > index 96513a7d4739..260b0ee5d1e3 100644 > > --- a/drivers/gpu/drm/i915/selftests/i915_active.c > > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c > > @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915) > > > > return i915_subtests(tests, i915); > > } > > + > > +static struct intel_engine_cs *node_to_barrier(struct active_node *it) > > +{ > > + struct intel_engine_cs *engine; > > + > > + if (!is_barrier(&it->base)) > > + return NULL; > > + > > + engine = __barrier_to_engine(it); > > + smp_rmb(); /* serialise with add_active_barriers */ > > I did find the pair. Builds confidence. > > > + if (!is_barrier(&it->base)) > > + return NULL; > > + > > + return engine; > > +} > > + > > +void i915_active_print(struct i915_active *ref, struct drm_printer *m) > > +{ > > + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); > > + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); > > + drm_printf(m, "\tpreallocated barriers? %s\n", > > + yesno(!llist_empty(&ref->preallocated_barriers))); > > + > > + if (i915_active_acquire_if_busy(ref)) { > > + struct active_node *it, *n; > > + > > + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { > > + struct intel_engine_cs *engine; > > + > > Does the aquire of ref keep the other lefs alive? > we seem to be safe on interation but the poking about > the fence set and timeline below is a question mark. It prevents the tree+nodes from being freed, so we only have to worry about the validity of the meaning of the contents. My memory says, and my assumption in this code, is that the the iterator is safe against insertions -- we won't get horribly lost if the tree is rebalanced as we walk. -Chris
Quoting Chris Wilson (2019-10-31 14:18:56) > My memory says, and my assumption in this code, is that the > the iterator is safe against insertions -- we won't get horribly lost if > the tree is rebalanced as we walk. Actually, the iterator is not perfect across rebalances. It won't matter here in the selftest, since we are the only accessor, the two other users deserve throught. In __active_retire, we have exclusive access to the tree as we are freeing the nodes. Safe. In i915_active_wait() [we can't take the mutex here due to shrinker inversions!], we walk the tree to kick signaling on the nodes. So the iterator is not perfect, but calling enable_signaling() is mostly an optimisation so that we don't have to wait upon the background flush. So I think we are safe to miss nodes, so long as the iterator itself is bounded (which it must be). -Chris
Chris Wilson <chris@chris-wilson.co.uk> writes: > If the idle_pulse fails to flush the i915_active, dump the tree to see > if that has any clues. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > .../drm/i915/gt/selftest_engine_heartbeat.c | 4 ++ > drivers/gpu/drm/i915/i915_active.h | 2 + > drivers/gpu/drm/i915/selftests/i915_active.c | 45 +++++++++++++++++++ > 3 files changed, 51 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > index 155c508024df..131c49ddf33f 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, > pulse_unlock_wait(p); /* synchronize with the retirement callback */ > > if (!i915_active_is_idle(&p->active)) { > + struct drm_printer m = drm_err_printer("pulse"); > + > pr_err("%s: heartbeat pulse did not flush idle tasks\n", > engine->name); > + i915_active_print(&p->active, &m); > + > err = -EINVAL; > goto out; > } > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index 4f52fe6146d2..44859356ce97 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, > void i915_active_acquire_barrier(struct i915_active *ref); > void i915_request_add_active_barriers(struct i915_request *rq); > > +void i915_active_print(struct i915_active *ref, struct drm_printer *m); > + > #endif /* _I915_ACTIVE_H_ */ > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c > index 96513a7d4739..260b0ee5d1e3 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_active.c > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c > @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915) > > return i915_subtests(tests, i915); > } > + > +static struct intel_engine_cs *node_to_barrier(struct active_node *it) > +{ > + struct intel_engine_cs *engine; > + > + if (!is_barrier(&it->base)) > + return NULL; > + > + engine = __barrier_to_engine(it); > + smp_rmb(); /* serialise with add_active_barriers */ > + if (!is_barrier(&it->base)) > + return NULL; > + > + return engine; > +} > + > +void i915_active_print(struct i915_active *ref, struct drm_printer *m) > +{ > + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); > + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); > + drm_printf(m, "\tpreallocated barriers? %s\n", > + yesno(!llist_empty(&ref->preallocated_barriers))); > + > + if (i915_active_acquire_if_busy(ref)) { > + struct active_node *it, *n; > + > + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { > + struct intel_engine_cs *engine; > + > + engine = node_to_barrier(it); > + if (engine) { > + drm_printf(m, "\tbarrier: %s\n", engine->name); > + continue; > + } > + > + if (i915_active_fence_isset(&it->base)) { > + drm_printf(m, > + "\ttimeline: %llx\n", it->timeline); > + continue; > + } > + } > + > + i915_active_release(ref); > + } > +} > -- > 2.24.0.rc1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 155c508024df..131c49ddf33f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -100,8 +100,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine, pulse_unlock_wait(p); /* synchronize with the retirement callback */ if (!i915_active_is_idle(&p->active)) { + struct drm_printer m = drm_err_printer("pulse"); + pr_err("%s: heartbeat pulse did not flush idle tasks\n", engine->name); + i915_active_print(&p->active, &m); + err = -EINVAL; goto out; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 4f52fe6146d2..44859356ce97 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -214,4 +214,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, void i915_active_acquire_barrier(struct i915_active *ref); void i915_request_add_active_barriers(struct i915_request *rq); +void i915_active_print(struct i915_active *ref, struct drm_printer *m); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 96513a7d4739..260b0ee5d1e3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -205,3 +205,48 @@ int i915_active_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } + +static struct intel_engine_cs *node_to_barrier(struct active_node *it) +{ + struct intel_engine_cs *engine; + + if (!is_barrier(&it->base)) + return NULL; + + engine = __barrier_to_engine(it); + smp_rmb(); /* serialise with add_active_barriers */ + if (!is_barrier(&it->base)) + return NULL; + + return engine; +} + +void i915_active_print(struct i915_active *ref, struct drm_printer *m) +{ + drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); + drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); + drm_printf(m, "\tpreallocated barriers? %s\n", + yesno(!llist_empty(&ref->preallocated_barriers))); + + if (i915_active_acquire_if_busy(ref)) { + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + struct intel_engine_cs *engine; + + engine = node_to_barrier(it); + if (engine) { + drm_printf(m, "\tbarrier: %s\n", engine->name); + continue; + } + + if (i915_active_fence_isset(&it->base)) { + drm_printf(m, + "\ttimeline: %llx\n", it->timeline); + continue; + } + } + + i915_active_release(ref); + } +}
If the idle_pulse fails to flush the i915_active, dump the tree to see if that has any clues. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- .../drm/i915/gt/selftest_engine_heartbeat.c | 4 ++ drivers/gpu/drm/i915/i915_active.h | 2 + drivers/gpu/drm/i915/selftests/i915_active.c | 45 +++++++++++++++++++ 3 files changed, 51 insertions(+)