diff mbox series

[08/10] drm/i915/selftests: Measure set-priority duration

Message ID 20210120122205.2808-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/10] drm/i915/gt: Do not suspend bonded requests if one hangs | expand

Commit Message

Chris Wilson Jan. 20, 2021, 12:22 p.m. UTC
As a topological sort, we expect it to run in linear graph time,
O(V+E). In removing the recursion, it is no longer a DFS but rather a
BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
examples, and build a few test cases to verify a potential fix.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_scheduler.c         |   4 +
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 679 ++++++++++++++++++
 4 files changed, 685 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c

Comments

Andi Shyti Jan. 26, 2021, 6:05 p.m. UTC | #1
On Wed, Jan 20, 2021 at 12:22:03PM +0000, Chris Wilson wrote:
> As a topological sort, we expect it to run in linear graph time,
> O(V+E). In removing the recursion, it is no longer a DFS but rather a
> BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
> examples, and build a few test cases to verify a potential fix.

very noble purpose, but...

[...]

> +static int sparse(struct drm_i915_private *i915,
> +		  bool (*fn)(struct i915_request *rq,
> +			     unsigned long v, unsigned long e))
> +{
> +	return chains(i915, sparse_chain, fn);
> +}

... this is quite an intricate web of functions calling each
other.

Is there any simplier way to do this? This is that kind of code
that if you go on holiday for a few days you forget what you
wrote.

I do need three drawing boards and 24 fingers for keeping track
of what's happening here. :)

> +
> +static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
> +{
> +	pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
> +}
> +
> +static u64 __set_priority(struct i915_request *rq, int prio)
> +{
> +	u64 dt;
> +
> +	preempt_disable();
> +	dt = ktime_get_raw_fast_ns();
> +	i915_request_set_priority(rq, prio);
> +	dt = ktime_get_raw_fast_ns() - dt;
> +	preempt_enable();
> +
> +	return dt;
> +}
> +
> +static bool set_priority(struct i915_request *rq,
> +			 unsigned long v, unsigned long e)
> +{
> +	report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));

can't we pr_info directly here and spare a function?

> +	return true;
> +}
> +
> +static int single_priority(void *arg)
> +{
> +	return single(arg, set_priority);
> +}
> +
> +static int wide_priority(void *arg)
> +{
> +	return wide(arg, set_priority);
> +}
> +
> +static int inv_priority(void *arg)
> +{
> +	return inv(arg, set_priority);
> +}
> +
> +static int sparse_priority(void *arg)
> +{
> +	return sparse(arg, set_priority);
> +}
> +
> +int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(single_priority),
> +		SUBTEST(wide_priority),
> +		SUBTEST(inv_priority),
> +		SUBTEST(sparse_priority),
> +	};
> +	static const struct {
> +		const char *name;
> +		size_t sz;
> +	} types[] = {
> +#define T(t) { #t, sizeof(struct t) }
> +		T(i915_priolist),
> +		T(i915_sched_attr),
> +		T(i915_sched_node),
> +#undef T

is this really making the code better? Is it a big deal to
clearly use

		{ i915_priolist, sizeof(i915_priolist) },
		{ i915_sched_attr, sizeof(i915_sched_attr) },
		{ i915_sched_node, sizeof(i915_sched_node) },

> +		{}
> +	};
> +	typeof(*types) *t;
> +
> +	for (t = types; t->name; t++)
> +		pr_info("sizeof(%s): %zd\n", t->name, t->sz);
> +
> +	return i915_subtests(tests, i915);
> +}
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Andi
Chris Wilson Jan. 26, 2021, 9:16 p.m. UTC | #2
Quoting Andi Shyti (2021-01-26 18:05:38)
> On Wed, Jan 20, 2021 at 12:22:03PM +0000, Chris Wilson wrote:
> > As a topological sort, we expect it to run in linear graph time,
> > O(V+E). In removing the recursion, it is no longer a DFS but rather a
> > BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
> > examples, and build a few test cases to verify a potential fix.
> 
> very noble purpose, but...
> 
> [...]
> 
> > +static int sparse(struct drm_i915_private *i915,
> > +               bool (*fn)(struct i915_request *rq,
> > +                          unsigned long v, unsigned long e))
> > +{
> > +     return chains(i915, sparse_chain, fn);
> > +}
> 
> ... this is quite an intricate web of functions calling each
> other.
> 
> Is there any simplier way to do this? This is that kind of code
> that if you go on holiday for a few days you forget what you
> wrote.

This was to remove duplication, and there's more tests to come in this
group that use the same framework and only differ in the final step.

> I do need three drawing boards and 24 fingers for keeping track
> of what's happening here. :)
> 
> > +
> > +static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
> > +{
> > +     pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
> > +}
> > +
> > +static u64 __set_priority(struct i915_request *rq, int prio)
> > +{
> > +     u64 dt;
> > +
> > +     preempt_disable();
> > +     dt = ktime_get_raw_fast_ns();
> > +     i915_request_set_priority(rq, prio);
> > +     dt = ktime_get_raw_fast_ns() - dt;
> > +     preempt_enable();
> > +
> > +     return dt;
> > +}
> > +
> > +static bool set_priority(struct i915_request *rq,
> > +                      unsigned long v, unsigned long e)
> > +{
> > +     report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
> 
> can't we pr_info directly here and spare a function?

It will be reused later.

> > +     return true;
> > +}
> > +
> > +static int single_priority(void *arg)
> > +{
> > +     return single(arg, set_priority);
> > +}
> > +
> > +static int wide_priority(void *arg)
> > +{
> > +     return wide(arg, set_priority);
> > +}
> > +
> > +static int inv_priority(void *arg)
> > +{
> > +     return inv(arg, set_priority);
> > +}
> > +
> > +static int sparse_priority(void *arg)
> > +{
> > +     return sparse(arg, set_priority);
> > +}
> > +
> > +int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
> > +{
> > +     static const struct i915_subtest tests[] = {
> > +             SUBTEST(single_priority),
> > +             SUBTEST(wide_priority),
> > +             SUBTEST(inv_priority),
> > +             SUBTEST(sparse_priority),
> > +     };
> > +     static const struct {
> > +             const char *name;
> > +             size_t sz;
> > +     } types[] = {
> > +#define T(t) { #t, sizeof(struct t) }
> > +             T(i915_priolist),
> > +             T(i915_sched_attr),
> > +             T(i915_sched_node),
> > +#undef T
> 
> is this really making the code better? Is it a big deal to
> clearly use
> 
>                 { i915_priolist, sizeof(i915_priolist) },
>                 { i915_sched_attr, sizeof(i915_sched_attr) },
>                 { i915_sched_node, sizeof(i915_sched_node) },

Duplication and more typing, you even left out the struct in
sizeof(struct T) :)

Did this save me more time to add stuff than it took to write #define T?
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 0ecf71a6afd4..4802c9b1081d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -592,6 +592,10 @@  void i915_request_show_with_schedule(struct drm_printer *m,
 	rcu_read_unlock();
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_scheduler.c"
+#endif
+
 static void i915_global_scheduler_shrink(void)
 {
 	kmem_cache_shrink(global.slab_dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a92c0e9b7e6b..2200a5baa68e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -26,6 +26,7 @@  selftest(gt_mocs, intel_mocs_live_selftests)
 selftest(gt_pm, intel_gt_pm_live_selftests)
 selftest(gt_heartbeat, intel_heartbeat_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(scheduler, i915_scheduler_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(mman, i915_gem_mman_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index c2389f8a257d..137e35283fee 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -17,5 +17,6 @@ 
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
 selftest(request, i915_request_perf_selftests)
+selftest(scheduler, i915_scheduler_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
new file mode 100644
index 000000000000..cb67de304aeb
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -0,0 +1,679 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_context.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/selftest_engine_heartbeat.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/i915_random.h"
+
+static void scheduling_disable(struct intel_engine_cs *engine)
+{
+	engine->props.preempt_timeout_ms = 0;
+	engine->props.timeslice_duration_ms = 0;
+
+	st_engine_heartbeat_disable(engine);
+}
+
+static void scheduling_enable(struct intel_engine_cs *engine)
+{
+	st_engine_heartbeat_enable(engine);
+
+	engine->props.preempt_timeout_ms =
+		engine->defaults.preempt_timeout_ms;
+	engine->props.timeslice_duration_ms =
+		engine->defaults.timeslice_duration_ms;
+}
+
+static int first_engine(struct drm_i915_private *i915,
+			int (*chain)(struct intel_engine_cs *engine,
+				     unsigned long param,
+				     bool (*fn)(struct i915_request *rq,
+						unsigned long v,
+						unsigned long e)),
+			unsigned long param,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		return chain(engine, param, fn);
+	}
+
+	return 0;
+}
+
+static int all_engines(struct drm_i915_private *i915,
+		       int (*chain)(struct intel_engine_cs *engine,
+				    unsigned long param,
+				    bool (*fn)(struct i915_request *rq,
+					       unsigned long v,
+					       unsigned long e)),
+		       unsigned long param,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_engine_cs *engine;
+	int err;
+
+	for_each_uabi_engine(engine, i915) {
+		if (!intel_engine_has_scheduler(engine))
+			continue;
+
+		err = chain(engine, param, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool check_context_order(struct intel_engine_cs *engine)
+{
+	u64 last_seqno, last_context;
+	unsigned long count;
+	bool result = false;
+	struct rb_node *rb;
+	int last_prio;
+
+	/* We expect the execution order to follow ascending fence-context */
+	spin_lock_irq(&engine->active.lock);
+
+	count = 0;
+	last_context = 0;
+	last_seqno = 0;
+	last_prio = 0;
+	for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct i915_request *rq;
+
+		priolist_for_each_request(rq, p) {
+			if (rq->fence.context < last_context ||
+			    (rq->fence.context == last_context &&
+			     rq->fence.seqno < last_seqno)) {
+				pr_err("[%lu] %llx:%lld [prio:%d] after %llx:%lld [prio:%d]\n",
+				       count,
+				       rq->fence.context,
+				       rq->fence.seqno,
+				       rq_prio(rq),
+				       last_context,
+				       last_seqno,
+				       last_prio);
+				goto out_unlock;
+			}
+
+			last_context = rq->fence.context;
+			last_seqno = rq->fence.seqno;
+			last_prio = rq_prio(rq);
+			count++;
+		}
+	}
+	result = true;
+out_unlock:
+	spin_unlock_irq(&engine->active.lock);
+
+	return result;
+}
+
+static int __single_chain(struct intel_engine_cs *engine, unsigned long length,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned long count;
+	unsigned long min;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto err_spin;
+	}
+	ce->ring = __intel_context_ring_size(SZ_512K);
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_context;
+	}
+	i915_request_add(rq);
+	min = ce->ring->size - ce->ring->space;
+
+	count = 1;
+	while (count < length && ce->ring->space > min) {
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+		i915_request_add(rq);
+		count++;
+	}
+	intel_engine_flush_submission(engine);
+
+	tasklet_disable(&engine->execlists.tasklet);
+	local_bh_disable();
+	if (fn(rq, count, count - 1) && !check_context_order(engine))
+		err = -EINVAL;
+	local_bh_enable();
+	tasklet_enable(&engine->execlists.tasklet);
+
+	igt_spinner_end(&spin);
+err_context:
+	intel_context_put(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __wide_chain(struct intel_engine_cs *engine, unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = 0; j < i; j++) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	tasklet_disable(&engine->execlists.tasklet);
+	local_bh_disable();
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	local_bh_enable();
+	tasklet_enable(&engine->execlists.tasklet);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __inv_chain(struct intel_engine_cs *engine, unsigned long width,
+		       bool (*fn)(struct i915_request *rq,
+				  unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+		for (j = i; j > 0; j--) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j - 1]->fence);
+			if (err)
+				break;
+			count++;
+		}
+		i915_request_add(rq[i]);
+	}
+	intel_engine_flush_submission(engine);
+
+	tasklet_disable(&engine->execlists.tasklet);
+	local_bh_disable();
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	local_bh_enable();
+	tasklet_enable(&engine->execlists.tasklet);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width,
+			  bool (*fn)(struct i915_request *rq,
+				     unsigned long v, unsigned long e))
+{
+	struct intel_context **ce;
+	struct i915_request **rq;
+	struct igt_spinner spin;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	unsigned long i, j;
+	int err = 0;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	scheduling_disable(engine);
+
+	if (igt_spinner_init(&spin, engine->gt)) {
+		err = -ENOMEM;
+		goto err_heartbeat;
+	}
+
+	ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
+	for (i = 0; i < width; i++) {
+		ce[i] = intel_context_create(engine);
+		if (IS_ERR(ce[i])) {
+			err = PTR_ERR(ce[i]);
+			width = i;
+			goto err_context;
+		}
+	}
+
+	rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+	if (!rq) {
+		err = -ENOMEM;
+		goto err_context;
+	}
+
+	rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+	if (IS_ERR(rq[0])) {
+		err = PTR_ERR(rq[0]);
+		goto err_free;
+	}
+	i915_request_add(rq[0]);
+
+	count = 0;
+	for (i = 1; i < width; i++) {
+		GEM_BUG_ON(i915_request_completed(rq[0]));
+
+		rq[i] = intel_context_create_request(ce[i]);
+		if (IS_ERR(rq[i])) {
+			err = PTR_ERR(rq[i]);
+			break;
+		}
+
+		if (err == 0 && i > 1) {
+			j = i915_prandom_u32_max_state(i - 1, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		if (err == 0) {
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[i - 1]->fence);
+			count++;
+		}
+
+		if (err == 0 && i > 2) {
+			j = i915_prandom_u32_max_state(i - 2, &prng);
+			err = i915_request_await_dma_fence(rq[i],
+							   &rq[j]->fence);
+			count++;
+		}
+
+		i915_request_add(rq[i]);
+		if (err)
+			break;
+	}
+	intel_engine_flush_submission(engine);
+
+	tasklet_disable(&engine->execlists.tasklet);
+	local_bh_disable();
+	if (fn(rq[i - 1], i, count) && !check_context_order(engine))
+		err = -EINVAL;
+	local_bh_enable();
+	tasklet_enable(&engine->execlists.tasklet);
+
+	igt_spinner_end(&spin);
+err_free:
+	kfree(rq);
+err_context:
+	for (i = 0; i < width; i++)
+		intel_context_put(ce[i]);
+	kfree(ce);
+err_spin:
+	igt_spinner_fini(&spin);
+err_heartbeat:
+	scheduling_enable(engine);
+	return err;
+}
+
+static int igt_schedule_chains(struct drm_i915_private *i915,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e))
+{
+	static int (* const chains[])(struct intel_engine_cs *engine,
+				      unsigned long length,
+				      bool (*fn)(struct i915_request *rq,
+						 unsigned long v, unsigned long e)) = {
+		__single_chain,
+		__wide_chain,
+		__inv_chain,
+		__sparse_chain,
+	};
+	int n, err;
+
+	for (n = 0; n < ARRAY_SIZE(chains); n++) {
+		err = all_engines(i915, chains[n], 17, fn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool igt_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	i915_request_set_priority(rq, I915_PRIORITY_BARRIER);
+	GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER);
+	return true;
+}
+
+static int igt_priority_chains(void *arg)
+{
+	return igt_schedule_chains(arg, igt_priority);
+}
+
+int i915_scheduler_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_priority_chains),
+	};
+
+	return i915_subtests(tests, i915);
+}
+
+static int chains(struct drm_i915_private *i915,
+		  int (*chain)(struct drm_i915_private *i915,
+			       unsigned long length,
+			       bool (*fn)(struct i915_request *rq,
+					  unsigned long v, unsigned long e)),
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 };
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++) {
+		IGT_TIMEOUT(end_time);
+
+		err = chain(i915, x[i], fn);
+		if (err)
+			return err;
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	return 0;
+}
+
+static int single_chain(struct drm_i915_private *i915,
+			unsigned long length,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __single_chain, length, fn);
+}
+
+static int single(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, single_chain, fn);
+}
+
+static int wide_chain(struct drm_i915_private *i915,
+		      unsigned long width,
+		      bool (*fn)(struct i915_request *rq,
+				 unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __wide_chain, width, fn);
+}
+
+static int wide(struct drm_i915_private *i915,
+		bool (*fn)(struct i915_request *rq,
+			   unsigned long v, unsigned long e))
+{
+	return chains(i915, wide_chain, fn);
+}
+
+static int inv_chain(struct drm_i915_private *i915,
+		     unsigned long width,
+		     bool (*fn)(struct i915_request *rq,
+				unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __inv_chain, width, fn);
+}
+
+static int inv(struct drm_i915_private *i915,
+	       bool (*fn)(struct i915_request *rq,
+			  unsigned long v, unsigned long e))
+{
+	return chains(i915, inv_chain, fn);
+}
+
+static int sparse_chain(struct drm_i915_private *i915,
+			unsigned long width,
+			bool (*fn)(struct i915_request *rq,
+				   unsigned long v, unsigned long e))
+{
+	return first_engine(i915, __sparse_chain, width, fn);
+}
+
+static int sparse(struct drm_i915_private *i915,
+		  bool (*fn)(struct i915_request *rq,
+			     unsigned long v, unsigned long e))
+{
+	return chains(i915, sparse_chain, fn);
+}
+
+static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
+{
+	pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
+}
+
+static u64 __set_priority(struct i915_request *rq, int prio)
+{
+	u64 dt;
+
+	preempt_disable();
+	dt = ktime_get_raw_fast_ns();
+	i915_request_set_priority(rq, prio);
+	dt = ktime_get_raw_fast_ns() - dt;
+	preempt_enable();
+
+	return dt;
+}
+
+static bool set_priority(struct i915_request *rq,
+			 unsigned long v, unsigned long e)
+{
+	report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
+	return true;
+}
+
+static int single_priority(void *arg)
+{
+	return single(arg, set_priority);
+}
+
+static int wide_priority(void *arg)
+{
+	return wide(arg, set_priority);
+}
+
+static int inv_priority(void *arg)
+{
+	return inv(arg, set_priority);
+}
+
+static int sparse_priority(void *arg)
+{
+	return sparse(arg, set_priority);
+}
+
+int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(single_priority),
+		SUBTEST(wide_priority),
+		SUBTEST(inv_priority),
+		SUBTEST(sparse_priority),
+	};
+	static const struct {
+		const char *name;
+		size_t sz;
+	} types[] = {
+#define T(t) { #t, sizeof(struct t) }
+		T(i915_priolist),
+		T(i915_sched_attr),
+		T(i915_sched_node),
+#undef T
+		{}
+	};
+	typeof(*types) *t;
+
+	for (t = types; t->name; t++)
+		pr_info("sizeof(%s): %zd\n", t->name, t->sz);
+
+	return i915_subtests(tests, i915);
+}