diff mbox series

[i-g-t] i915: Add gem_exec_endless

Message ID 20200519100217.1761103-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t] i915: Add gem_exec_endless | expand

Commit Message

Chris Wilson May 19, 2020, 10:02 a.m. UTC
Start our preparations for guaranteeing endless execution.

First, we just want to estimate the 'ulta-low latency' dispatch overhead
by running an endless chain of batch buffers. The legacy binding process
here will be replaced by async VM_BIND, but for the moment this
suffices to construct the GTT as required for arbitrary
*user-controlled* indirect execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 lib/igt_core.h                |   1 +
 tests/Makefile.sources        |   3 +
 tests/i915/gem_exec_endless.c | 354 ++++++++++++++++++++++++++++++++++
 tests/meson.build             |   1 +
 4 files changed, 359 insertions(+)
 create mode 100644 tests/i915/gem_exec_endless.c

Comments

Mika Kuoppala May 19, 2020, 10:43 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Start our preparations for guaranteeing endless execution.
>
> First, we just want to estimate the 'ulta-low latency' dispatch overhead
> by running an endless chain of batch buffers. The legacy binding process
> here will be replaced by async VM_BIND, but for the moment this
> suffices to construct the GTT as required for arbitrary
> *user-controlled* indirect execution.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  lib/igt_core.h                |   1 +
>  tests/Makefile.sources        |   3 +
>  tests/i915/gem_exec_endless.c | 354 ++++++++++++++++++++++++++++++++++
>  tests/meson.build             |   1 +
>  4 files changed, 359 insertions(+)
>  create mode 100644 tests/i915/gem_exec_endless.c
>
> diff --git a/lib/igt_core.h b/lib/igt_core.h
> index b97fa2faa..c58715204 100644
> --- a/lib/igt_core.h
> +++ b/lib/igt_core.h
> @@ -1369,6 +1369,7 @@ void igt_kmsg(const char *format, ...);
>  #define KMSG_DEBUG	"<7>[IGT] "
>  
>  #define READ_ONCE(x) (*(volatile typeof(x) *)(&(x)))
> +#define WRITE_ONCE(x, v) do *(volatile typeof(x) *)(&(x)) = (v); while (0)
>  
>  #define MSEC_PER_SEC (1000)
>  #define USEC_PER_SEC (1000*MSEC_PER_SEC)
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index c450fa0ed..d1f7cf819 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -265,6 +265,9 @@ gem_exec_schedule_SOURCES = i915/gem_exec_schedule.c
>  TESTS_progs += gem_exec_store
>  gem_exec_store_SOURCES = i915/gem_exec_store.c
>  
> +TESTS_progs += gem_exec_endless
> +gem_exec_endless_SOURCES = i915/gem_exec_endless.c
> +
>  TESTS_progs += gem_exec_suspend
>  gem_exec_suspend_SOURCES = i915/gem_exec_suspend.c
>  
> diff --git a/tests/i915/gem_exec_endless.c b/tests/i915/gem_exec_endless.c
> new file mode 100644
> index 000000000..c25c94641
> --- /dev/null
> +++ b/tests/i915/gem_exec_endless.c
> @@ -0,0 +1,354 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <sys/ioctl.h>
> +
> +#include "i915/gem.h"
> +#include "i915/gem_ring.h"
> +#include "igt.h"
> +#include "sw_sync.h"
> +
> +#define MAX_ENGINES 64
> +
> +#define MI_SEMAPHORE_WAIT		(0x1c << 23)
> +#define   MI_SEMAPHORE_POLL             (1 << 15)
> +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
> +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
> +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
> +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
> +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
> +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
> +
> +static uint32_t batch_create(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle = gem_create(i915, 4096);
> +	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
> +	return handle;
> +}
> +
> +struct supervisor {
> +	int device;
> +	uint32_t handle;
> +	uint32_t context;
> +
> +	uint32_t *map;
> +	uint32_t *semaphore;
> +	uint32_t *terminate;
> +	uint64_t *dispatch;
> +};
> +
> +static unsigned int offset_in_page(void *addr)
> +{
> +	return (uintptr_t)addr & 4095;
> +}
> +
> +static uint32_t __supervisor_create_context(int i915,
> +					    const struct intel_execution_engine2 *e)
> +{
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2);
> +	struct drm_i915_gem_context_create_ext_setparam p_ring = {
> +		{
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = 0
> +		},
> +		{
> +			.param = I915_CONTEXT_PARAM_RINGSIZE,
> +			.value = 4096,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_engines = {
> +		{
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_ring)
> +
> +		},
> +		{
> +			.param = I915_CONTEXT_PARAM_ENGINES,
> +			.value = to_user_pointer(&engines),
> +			.size = sizeof(engines),
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_persistence = {
> +		{
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_engines)
> +
> +		},
> +		{
> +			.param = I915_CONTEXT_PARAM_PERSISTENCE,
> +			.value = 0
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&p_persistence),
> +	};
> +
> +	for (int n = 0; n < 2; n++) { /* [exec, bind] */
> +		engines.engines[n].engine_class = e->class;
> +		engines.engines[n].engine_instance = e->instance;
> +	}
> +
> +	ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &create);
> +	return create.ctx_id;
> +}
> +
> +static void __supervisor_create(int i915,
> +				const struct intel_execution_engine2 *e,
> +				struct supervisor *sv)
> +{
> +	sv->device = i915;
> +	sv->context = __supervisor_create_context(i915, e);
> +	igt_require(sv->context);
> +
> +	sv->handle = gem_create(i915, 4096);
> +	sv->map = gem_mmap__device_coherent(i915, sv->handle,
> +					    0, 4096, PROT_WRITE);
> +}
> +
> +static void __supervisor_run(struct supervisor *sv)
> +{
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = sv->handle,
> +		.flags = EXEC_OBJECT_PINNED
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.rsvd1 = sv->context,
> +	};
> +	uint32_t *cs = sv->map;
> +
> +	sv->semaphore = cs + 1000;
> +
> +	*cs++ = MI_SEMAPHORE_WAIT |
> +		MI_SEMAPHORE_POLL |
> +		MI_SEMAPHORE_SAD_EQ_SDD |
> +		(4 - 2);
> +	*cs++ = 1;
> +	*cs++ = offset_in_page(sv->semaphore);
> +	*cs++ = 0;
> +
> +	sv->terminate = cs;
> +	*cs++ = MI_STORE_DWORD_IMM;
> +	*cs++ = offset_in_page(sv->semaphore);
> +	*cs++ = 0;
> +	*cs++ = 0;
> +
> +	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> +	sv->dispatch = (uint64_t *)cs; /* to be filled in later */
> +
> +	gem_execbuf(sv->device, &execbuf);
> +	igt_assert_eq_u64(obj.offset, 0);
> +}
> +
> +static void supervisor_open(int i915,
> +			    const struct intel_execution_engine2 *e,
> +			    struct supervisor *sv)
> +{
> +	__supervisor_create(i915, e, sv);
> +	__supervisor_run(sv);
> +}
> +
> +static void supervisor_dispatch(struct supervisor *sv, uint64_t addr)
> +{
> +	WRITE_ONCE(*sv->dispatch, 64 << 10);

addr << 10 ?

-Mika

> +	WRITE_ONCE(*sv->semaphore, 1);
> +	__sync_synchronize();
> +}
> +
> +static void legacy_supervisor_bind(struct supervisor *sv, uint32_t handle, uint64_t addr)
> +{
> +	struct drm_i915_gem_exec_object2 obj[2] = {
> +		{
> +			.handle = handle,
> +			.offset = addr,
> +			.flags = EXEC_OBJECT_PINNED
> +		},
> +		{
> +			.handle = batch_create(sv->device)
> +		}
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(obj),
> +		.buffer_count = ARRAY_SIZE(obj),
> +		.rsvd1 = sv->context,
> +		.flags = 1, /* legacy bind engine */
> +	};
> +
> +	gem_execbuf(sv->device, &execbuf);
> +	gem_close(sv->device, obj[1].handle);
> +
> +	gem_sync(sv->device, handle); /* must wait for async binds */
> +}
> +
> +static void emit_bbe_chain(uint32_t *cs)
> +{
> +	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +}
> +
> +static void supervisor_close(struct supervisor *sv)
> +{
> +	WRITE_ONCE(*sv->terminate, MI_BATCH_BUFFER_END);
> +	WRITE_ONCE(*sv->semaphore, 1);
> +	__sync_synchronize();
> +	munmap(sv->map, 4096);
> +
> +	gem_sync(sv->device, sv->handle);
> +	gem_close(sv->device, sv->handle);
> +
> +	gem_context_destroy(sv->device, sv->context);
> +}
> +
> +static int read_timestamp_frequency(int i915)
> +{
> +	int value = 0;
> +	drm_i915_getparam_t gp = {
> +		.value = &value,
> +		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
> +	};
> +	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
> +	return value;
> +}
> +
> +static int cmp_u32(const void *A, const void *B)
> +{
> +	const uint32_t *a = A, *b = B;
> +
> +	if (*a < *b)
> +		return -1;
> +	else if (*a > *b)
> +		return 1;
> +	else
> +		return 0;
> +}
> +
> +static uint32_t trifilter(uint32_t *x)
> +{
> +	qsort(x, 5, sizeof(*x), cmp_u32);
> +	return (x[1] + 2 * x[2] + x[3]) / 4;
> +}
> +
> +#define TIMESTAMP (0x358)
> +static void endless_dispatch(int i915, const struct intel_execution_engine2 *e)
> +{
> +	const uint32_t mmio_base = gem_engine_mmio_base(i915, e->name);
> +	const int cs_timestamp_freq = read_timestamp_frequency(i915);
> +	uint32_t handle, *cs, *map;
> +	struct supervisor sv;
> +	uint32_t latency[5];
> +	uint32_t *timestamp;
> +	uint32_t *result;
> +
> +	/*
> +	 * Launch a supervisor bb.
> +	 * Wait on semaphore.
> +	 * Bind second bb.
> +	 * Write new address into MI_BB_START
> +	 * Release semaphore.
> +	 *
> +	 * Check we see the second bb execute.
> +	 *
> +	 * Chain MI_BB_START to supervisor bb (replacing BBE).
> +	 *
> +	 * Final dispatch is BBE.
> +	 */
> +
> +	igt_require(gem_class_has_mutable_submission(i915, e->class));
> +
> +	igt_require(mmio_base);
> +	timestamp = (void *)igt_global_mmio + mmio_base + TIMESTAMP;
> +
> +	supervisor_open(i915, e, &sv);
> +	result = sv.semaphore + 1;
> +
> +	handle = gem_create(i915, 4096);
> +	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
> +	*cs++ = 0x24 << 23 | 2; /* SRM */
> +	*cs++ = mmio_base + TIMESTAMP;
> +	*cs++ = offset_in_page(result);
> +	*cs++ = 0;
> +	emit_bbe_chain(cs);
> +	munmap(map, 4096);
> +	legacy_supervisor_bind(&sv, handle, 64 << 10);
> +
> +	for (int pass = 0; pass < ARRAY_SIZE(latency); pass++) {
> +		uint32_t start, end;
> +
> +		WRITE_ONCE(*result, 0);
> +		start = READ_ONCE(*timestamp);
> +		supervisor_dispatch(&sv, 64 << 10);
> +		while (!(end = READ_ONCE(*result)))
> +			;
> +
> +		igt_assert_eq(READ_ONCE(*sv.semaphore), 0);
> +		latency[pass] = end - start;
> +	}
> +
> +	latency[0] = trifilter(latency);
> +	igt_info("Dispatch latency: %u cycles, %.0fns\n",
> +		 latency[0], latency[0] * 1e9 / cs_timestamp_freq);
> +
> +	supervisor_close(&sv);
> +
> +	gem_close(i915, handle);
> +}
> +
> +#define test_each_engine(T, i915, e) \
> +	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
> +		for_each_if(gem_class_can_store_dword(i915, (e)->class)) \
> +			igt_dynamic_f("%s", (e)->name)
> +igt_main
> +{
> +	const struct intel_execution_engine2 *e;
> +	int i915 = -1;
> +
> +	igt_skip_on_simulation();
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +	}
> +
> +	igt_subtest_group {
> +		struct intel_mmio_data mmio;
> +
> +		igt_fixture {
> +			igt_require(gem_scheduler_enabled(i915));
> +			igt_require(gem_scheduler_has_preemption(i915));
> +
> +			intel_register_access_init(&mmio,
> +						   intel_get_pci_device(),
> +						   false, i915);
> +		}
> +
> +		test_each_engine("dispatch", i915, e)
> +				endless_dispatch(i915, e);
> +
> +		igt_fixture
> +			intel_register_access_fini(&mmio);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 88e4875b6..9312b6944 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -140,6 +140,7 @@ i915_progs = [
>  	'gem_exec_big',
>  	'gem_exec_capture',
>  	'gem_exec_create',
> +	'gem_exec_endless',
>  	'gem_exec_fence',
>  	'gem_exec_flush',
>  	'gem_exec_gttfill',
> -- 
> 2.26.2
Chris Wilson May 19, 2020, 10:47 a.m. UTC | #2
Quoting Mika Kuoppala (2020-05-19 11:43:16)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +static void supervisor_dispatch(struct supervisor *sv, uint64_t addr)
> > +{
> > +     WRITE_ONCE(*sv->dispatch, 64 << 10);
> 
> addr << 10 ?

addr :)
-Chris
diff mbox series

Patch

diff --git a/lib/igt_core.h b/lib/igt_core.h
index b97fa2faa..c58715204 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -1369,6 +1369,7 @@  void igt_kmsg(const char *format, ...);
 #define KMSG_DEBUG	"<7>[IGT] "
 
 #define READ_ONCE(x) (*(volatile typeof(x) *)(&(x)))
+#define WRITE_ONCE(x, v) do *(volatile typeof(x) *)(&(x)) = (v); while (0)
 
 #define MSEC_PER_SEC (1000)
 #define USEC_PER_SEC (1000*MSEC_PER_SEC)
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c450fa0ed..d1f7cf819 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -265,6 +265,9 @@  gem_exec_schedule_SOURCES = i915/gem_exec_schedule.c
 TESTS_progs += gem_exec_store
 gem_exec_store_SOURCES = i915/gem_exec_store.c
 
+TESTS_progs += gem_exec_endless
+gem_exec_endless_SOURCES = i915/gem_exec_endless.c
+
 TESTS_progs += gem_exec_suspend
 gem_exec_suspend_SOURCES = i915/gem_exec_suspend.c
 
diff --git a/tests/i915/gem_exec_endless.c b/tests/i915/gem_exec_endless.c
new file mode 100644
index 000000000..c25c94641
--- /dev/null
+++ b/tests/i915/gem_exec_endless.c
@@ -0,0 +1,354 @@ 
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/ioctl.h>
+
+#include "i915/gem.h"
+#include "i915/gem_ring.h"
+#include "igt.h"
+#include "sw_sync.h"
+
+#define MAX_ENGINES 64
+
+#define MI_SEMAPHORE_WAIT		(0x1c << 23)
+#define   MI_SEMAPHORE_POLL             (1 << 15)
+#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
+#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
+#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
+
+static uint32_t batch_create(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle = gem_create(i915, 4096);
+	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
+	return handle;
+}
+
+struct supervisor {
+	int device;
+	uint32_t handle;
+	uint32_t context;
+
+	uint32_t *map;
+	uint32_t *semaphore;
+	uint32_t *terminate;
+	uint64_t *dispatch;
+};
+
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
+}
+
+static uint32_t __supervisor_create_context(int i915,
+					    const struct intel_execution_engine2 *e)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2);
+	struct drm_i915_gem_context_create_ext_setparam p_ring = {
+		{
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = 0
+		},
+		{
+			.param = I915_CONTEXT_PARAM_RINGSIZE,
+			.value = 4096,
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+		{
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_ring)
+
+		},
+		{
+			.param = I915_CONTEXT_PARAM_ENGINES,
+			.value = to_user_pointer(&engines),
+			.size = sizeof(engines),
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_persistence = {
+		{
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_engines)
+
+		},
+		{
+			.param = I915_CONTEXT_PARAM_PERSISTENCE,
+			.value = 0
+		},
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&p_persistence),
+	};
+
+	for (int n = 0; n < 2; n++) { /* [exec, bind] */
+		engines.engines[n].engine_class = e->class;
+		engines.engines[n].engine_instance = e->instance;
+	}
+
+	ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &create);
+	return create.ctx_id;
+}
+
+static void __supervisor_create(int i915,
+				const struct intel_execution_engine2 *e,
+				struct supervisor *sv)
+{
+	sv->device = i915;
+	sv->context = __supervisor_create_context(i915, e);
+	igt_require(sv->context);
+
+	sv->handle = gem_create(i915, 4096);
+	sv->map = gem_mmap__device_coherent(i915, sv->handle,
+					    0, 4096, PROT_WRITE);
+}
+
+static void __supervisor_run(struct supervisor *sv)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = sv->handle,
+		.flags = EXEC_OBJECT_PINNED
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = sv->context,
+	};
+	uint32_t *cs = sv->map;
+
+	sv->semaphore = cs + 1000;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD |
+		(4 - 2);
+	*cs++ = 1;
+	*cs++ = offset_in_page(sv->semaphore);
+	*cs++ = 0;
+
+	sv->terminate = cs;
+	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = offset_in_page(sv->semaphore);
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	sv->dispatch = (uint64_t *)cs; /* to be filled in later */
+
+	gem_execbuf(sv->device, &execbuf);
+	igt_assert_eq_u64(obj.offset, 0);
+}
+
+static void supervisor_open(int i915,
+			    const struct intel_execution_engine2 *e,
+			    struct supervisor *sv)
+{
+	__supervisor_create(i915, e, sv);
+	__supervisor_run(sv);
+}
+
+static void supervisor_dispatch(struct supervisor *sv, uint64_t addr)
+{
+	WRITE_ONCE(*sv->dispatch, 64 << 10);
+	WRITE_ONCE(*sv->semaphore, 1);
+	__sync_synchronize();
+}
+
+static void legacy_supervisor_bind(struct supervisor *sv, uint32_t handle, uint64_t addr)
+{
+	struct drm_i915_gem_exec_object2 obj[2] = {
+		{
+			.handle = handle,
+			.offset = addr,
+			.flags = EXEC_OBJECT_PINNED
+		},
+		{
+			.handle = batch_create(sv->device)
+		}
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(obj),
+		.buffer_count = ARRAY_SIZE(obj),
+		.rsvd1 = sv->context,
+		.flags = 1, /* legacy bind engine */
+	};
+
+	gem_execbuf(sv->device, &execbuf);
+	gem_close(sv->device, obj[1].handle);
+
+	gem_sync(sv->device, handle); /* must wait for async binds */
+}
+
+static void emit_bbe_chain(uint32_t *cs)
+{
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	*cs++ = 0;
+	*cs++ = 0;
+}
+
+static void supervisor_close(struct supervisor *sv)
+{
+	WRITE_ONCE(*sv->terminate, MI_BATCH_BUFFER_END);
+	WRITE_ONCE(*sv->semaphore, 1);
+	__sync_synchronize();
+	munmap(sv->map, 4096);
+
+	gem_sync(sv->device, sv->handle);
+	gem_close(sv->device, sv->handle);
+
+	gem_context_destroy(sv->device, sv->context);
+}
+
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint32_t trifilter(uint32_t *x)
+{
+	qsort(x, 5, sizeof(*x), cmp_u32);
+	return (x[1] + 2 * x[2] + x[3]) / 4;
+}
+
+#define TIMESTAMP (0x358)
+static void endless_dispatch(int i915, const struct intel_execution_engine2 *e)
+{
+	const uint32_t mmio_base = gem_engine_mmio_base(i915, e->name);
+	const int cs_timestamp_freq = read_timestamp_frequency(i915);
+	uint32_t handle, *cs, *map;
+	struct supervisor sv;
+	uint32_t latency[5];
+	uint32_t *timestamp;
+	uint32_t *result;
+
+	/*
+	 * Launch a supervisor bb.
+	 * Wait on semaphore.
+	 * Bind second bb.
+	 * Write new address into MI_BB_START
+	 * Release semaphore.
+	 *
+	 * Check we see the second bb execute.
+	 *
+	 * Chain MI_BB_START to supervisor bb (replacing BBE).
+	 *
+	 * Final dispatch is BBE.
+	 */
+
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	igt_require(mmio_base);
+	timestamp = (void *)igt_global_mmio + mmio_base + TIMESTAMP;
+
+	supervisor_open(i915, e, &sv);
+	result = sv.semaphore + 1;
+
+	handle = gem_create(i915, 4096);
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + TIMESTAMP;
+	*cs++ = offset_in_page(result);
+	*cs++ = 0;
+	emit_bbe_chain(cs);
+	munmap(map, 4096);
+	legacy_supervisor_bind(&sv, handle, 64 << 10);
+
+	for (int pass = 0; pass < ARRAY_SIZE(latency); pass++) {
+		uint32_t start, end;
+
+		WRITE_ONCE(*result, 0);
+		start = READ_ONCE(*timestamp);
+		supervisor_dispatch(&sv, 64 << 10);
+		while (!(end = READ_ONCE(*result)))
+			;
+
+		igt_assert_eq(READ_ONCE(*sv.semaphore), 0);
+		latency[pass] = end - start;
+	}
+
+	latency[0] = trifilter(latency);
+	igt_info("Dispatch latency: %u cycles, %.0fns\n",
+		 latency[0], latency[0] * 1e9 / cs_timestamp_freq);
+
+	supervisor_close(&sv);
+
+	gem_close(i915, handle);
+}
+
+#define test_each_engine(T, i915, e) \
+	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
+		for_each_if(gem_class_can_store_dword(i915, (e)->class)) \
+			igt_dynamic_f("%s", (e)->name)
+igt_main
+{
+	const struct intel_execution_engine2 *e;
+	int i915 = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+	}
+
+	igt_subtest_group {
+		struct intel_mmio_data mmio;
+
+		igt_fixture {
+			igt_require(gem_scheduler_enabled(i915));
+			igt_require(gem_scheduler_has_preemption(i915));
+
+			intel_register_access_init(&mmio,
+						   intel_get_pci_device(),
+						   false, i915);
+		}
+
+		test_each_engine("dispatch", i915, e)
+				endless_dispatch(i915, e);
+
+		igt_fixture
+			intel_register_access_fini(&mmio);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 88e4875b6..9312b6944 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -140,6 +140,7 @@  i915_progs = [
 	'gem_exec_big',
 	'gem_exec_capture',
 	'gem_exec_create',
+	'gem_exec_endless',
 	'gem_exec_fence',
 	'gem_exec_flush',
 	'gem_exec_gttfill',