diff mbox series

[i-g-t,08/16] i915: Exercise creating context with shared GTT

Message ID 20190508100958.32637-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups | expand

Commit Message

Chris Wilson May 8, 2019, 10:09 a.m. UTC
v2: Test each shared context is its own timeline and allows request
reordering between shared contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 lib/i915/gem_context.c        |  68 +++
 lib/i915/gem_context.h        |  13 +
 tests/Makefile.sources        |   1 +
 tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
 tests/i915/gem_exec_whisper.c |  32 +-
 tests/meson.build             |   1 +
 6 files changed, 962 insertions(+), 9 deletions(-)
 create mode 100644 tests/i915/gem_ctx_shared.c

Comments

Tvrtko Ursulin May 15, 2019, 6:37 a.m. UTC | #1
On 08/05/2019 11:09, Chris Wilson wrote:
> v2: Test each shared context is its own timeline and allows request
> reordering between shared contexts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>   lib/i915/gem_context.c        |  68 +++
>   lib/i915/gem_context.h        |  13 +
>   tests/Makefile.sources        |   1 +
>   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>   tests/i915/gem_exec_whisper.c |  32 +-
>   tests/meson.build             |   1 +
>   6 files changed, 962 insertions(+), 9 deletions(-)
>   create mode 100644 tests/i915/gem_ctx_shared.c
> 
> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> index f94d89cb4..8fb8984d1 100644
> --- a/lib/i915/gem_context.c
> +++ b/lib/i915/gem_context.c
> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>   	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>   }
>   
> +int
> +__gem_context_clone(int i915,
> +		    uint32_t src, unsigned int share,
> +		    unsigned int flags,
> +		    uint32_t *out)
> +{
> +	struct drm_i915_gem_context_create_ext_clone clone = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = src,
> +		.flags = share,
> +	};
> +	struct drm_i915_gem_context_create_ext arg = {
> +		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&clone),
> +	};
> +	int err = 0;
> +
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> +		err = -errno;
> +
> +	*out = arg.ctx_id;
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, share, flags, &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	errno = 0;
> +	return ctx;
> +}
> +
> +bool gem_contexts_has_shared_gtt(int i915)
> +{
> +	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> +}
> +
> +bool gem_has_queues(int i915)
> +{
> +	return __gem_context_has(i915,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> +
> +	return ctx;
> +}
> +
> +uint32_t gem_queue_create(int i915)
> +{
> +	return gem_context_clone(i915, 0,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
>   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>   {
>   	struct drm_i915_gem_exec_object2 exec = {};
> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> index a052714d4..8043c3401 100644
> --- a/lib/i915/gem_context.h
> +++ b/lib/i915/gem_context.h
> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>   void gem_context_destroy(int fd, uint32_t ctx_id);
>   int __gem_context_destroy(int fd, uint32_t ctx_id);
>   
> +int __gem_context_clone(int i915,
> +			uint32_t src, unsigned int share,
> +			unsigned int flags,
> +			uint32_t *out);
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags);
> +
> +uint32_t gem_queue_create(int i915);
> +
> +bool gem_contexts_has_shared_gtt(int i915);
> +bool gem_has_queues(int i915);
> +
>   bool gem_has_contexts(int fd);
>   void gem_require_contexts(int fd);
>   void gem_context_require_bannable(int fd);
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e1b7feeb2..3552e895b 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -22,6 +22,7 @@ TESTS_progs = \
>   	drm_mm \
>   	drm_read \
>   	i915/gem_ctx_clone \
> +	i915/gem_ctx_shared \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> new file mode 100644
> index 000000000..0076f5e9d
> --- /dev/null
> +++ b/tests/i915/gem_ctx_shared.c
> @@ -0,0 +1,856 @@
> +/*
> + * Copyright © 2017 Intel Corporation

2019

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "igt_rand.h"
> +#include "igt_vgem.h"
> +#include "sync_file.h"
> +
> +#define LO 0
> +#define HI 1
> +#define NOISE 2
> +
> +#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
> +#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
> +
> +static int priorities[] = {
> +	[LO] = MIN_PRIO / 2,
> +	[HI] = MAX_PRIO / 2,
> +};
> +
> +#define MAX_ELSP_QLEN 16
> +
> +IGT_TEST_DESCRIPTION("Test shared contexts.");
> +
> +static void create_shared_gtt(int i915, unsigned int flags)
> +#define DETACHED 0x1
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	uint32_t parent, child;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	child = flags & DETACHED ? gem_context_create(i915) : 0;
> +	igt_until_timeout(2) {
> +		parent = flags & DETACHED ? child : 0;
> +		child = gem_context_clone(i915,
> +					  parent, I915_CONTEXT_CLONE_VM,
> +					  0);
> +		execbuf.rsvd1 = child;
> +		gem_execbuf(i915, &execbuf);
> +
> +		if (flags & DETACHED) {
> +			gem_context_destroy(i915, parent);
> +			gem_execbuf(i915, &execbuf);
> +		} else {
> +			parent = child;
> +			gem_context_destroy(i915, parent);
> +		}
> +
> +		execbuf.rsvd1 = parent;
> +		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
> +		igt_assert_eq(__gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &parent), -ENOENT);
> +	}
> +	if (flags & DETACHED)
> +		gem_context_destroy(i915, child);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void disjoint_timelines(int i915)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	igt_spin_t *spin[2];
> +	uint32_t plug, child;
> +
> +	igt_require(gem_has_execlists(i915));
> +
> +	/*
> +	 * Each context, although they share a vm, are expected to be
> +	 * distinct timelines. A request queued to one context should be
> +	 * independent of any shared contexts.
> +	 */
> +	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
> +	spin[1] = __igt_spin_new(i915, .ctx = child);
> +
> +	/* Wait for the second spinner, will hang if stuck behind the first */
> +	igt_spin_end(spin[1]);
> +	gem_sync(i915, spin[1]->handle);
> +
> +	igt_cork_unplug(&cork);
> +
> +	igt_spin_free(i915, spin[1]);
> +	igt_spin_free(i915, spin[0]);
> +}
> +
> +static void exhaust_shared_gtt(int i915, unsigned int flags)
> +#define EXHAUST_LRC 0x1
> +{
> +	i915 = gem_reopen_driver(i915);
> +
> +	igt_fork(pid, 1) {
> +		const uint32_t bbe = MI_BATCH_BUFFER_END;
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = gem_create(i915, 4096)
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +		};
> +		uint32_t parent, child;
> +		unsigned long count = 0;
> +		int err;
> +
> +		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		child = 0;
> +		for (;;) {
> +			parent = child;
> +			err = __gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &child);
> +			if (err)
> +				break;
> +
> +			if (flags & EXHAUST_LRC) {
> +				execbuf.rsvd1 = child;
> +				err = __gem_execbuf(i915, &execbuf);
> +				if (err)
> +					break;
> +			}

What are the stop conditions in this test, with and without the 
EXHAUST_LRC flag? It would be good to put that in a comment.

Especially since AFAIR this one was causing OOM for me so might need to 
be tweaked.

> +
> +			count++;
> +		}
> +		gem_sync(i915, obj.handle);
> +
> +		igt_info("Created %lu shared contexts, before %d (%s)\n",
> +			 count, err, strerror(-err));
> +	}
> +	close(i915);
> +	igt_waitchildren();
> +}
> +
> +static void exec_shared_gtt(int i915, unsigned int ring)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096)
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +	};
> +	uint32_t scratch = obj.handle;
> +	uint32_t batch[16];
> +	int i;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(gem_can_store_dword(i915, ring));
> +
> +	/* Load object into place in the GTT */
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +
> +	/* Presume nothing causes an eviction in the meantime */
> +
> +	obj.handle = gem_create(i915, 4096);
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = obj.offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = obj.offset;
> +	} else {
> +		batch[i]--;
> +		batch[++i] = obj.offset;
> +	}
> +	batch[++i] = 0xc0ffee;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +
> +	obj.offset += 4096; /* make sure we don't cause an eviction! */

Is 4k apart safe?

A short comment on how does this test work would be good.

> +	obj.flags |= EXEC_OBJECT_PINNED;
> +	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	if (gen > 3 && gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	gem_execbuf(i915, &execbuf);
> +	gem_context_destroy(i915, execbuf.rsvd1);
> +	gem_sync(i915, obj.handle); /* write hazard lies */
> +	gem_close(i915, obj.handle);
> +
> +	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> +	gem_close(i915, scratch);
> +
> +	igt_assert_eq_u32(*batch, 0xc0ffee);
> +}
> +
> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	int err;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	err = gem_wait(i915, obj.handle, &timeout);
> +	gem_close(i915, obj.handle);
> +
> +	return err;
> +}
> +
> +static bool has_single_timeline(int i915)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, 0,
> +			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> +			    &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	return ctx != 0;
> +}
> +
> +static bool ignore_engine(unsigned engine)
> +{
> +	if (engine == 0)
> +		return true;
> +
> +	if (engine == I915_EXEC_BSD)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void single_timeline(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	struct sync_fence_info rings[16];

Could use for_each_physical_engine to count the engines. But we probably 
have plenty of this around the code base.

> +	struct sync_file_info sync_file_info = {
> +		.num_fences = 1,
> +	};
> +	unsigned int engine;
> +	int n;
> +
> +	igt_require(has_single_timeline(i915));
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	/*
> +	 * For a "single timeline" context, each ring is on the common
> +	 * timeline, unlike a normal context where each ring has an
> +	 * independent timeline. That is no matter which engine we submit
> +	 * to, it reports the same timeline name and fence context. However,
> +	 * the fence context is not reported through the sync_fence_info.

Is the test useful then? There was one I reviewed earlier in this series 
which tested for execution ordering, which sounds like is what's needed.

> +	 */
> +	execbuf.rsvd1 =
> +		gem_context_clone(i915, 0, 0,
> +				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	execbuf.flags = I915_EXEC_FENCE_OUT;
> +	n = 0;
> +	for_each_engine(i915, engine) {

for_each_physical_engine to align with Andi's work?

> +		gem_execbuf_wr(i915, &execbuf);
> +		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> +		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> +		close(execbuf.rsvd2 >> 32);
> +
> +		igt_info("ring[%d] fence: %s %s\n",
> +			 n, rings[n].driver_name, rings[n].obj_name);
> +		n++;
> +	}
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +
> +	for (int i = 1; i < n; i++) {
> +		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> +		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));

What is in obj_name?

> +	}
> +}
> +
> +static void exec_single_timeline(int i915, unsigned int ring)
> +{
> +	unsigned int other;
> +	igt_spin_t *spin;
> +	uint32_t ctx;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(has_single_timeline(i915));
> +
> +	/*
> +	 * On an ordinary context, a blockage on one ring doesn't prevent
> +	 * execution on an other.
> +	 */
> +	ctx = 0;
> +	spin = NULL;
> +	for_each_engine(i915, other) {

for_each_physical

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_require(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> +	igt_spin_free(i915, spin);
> +
> +	/*
> +	 * But if we create a context with just a single shared timeline,
> +	 * then it will block waiting for the earlier requests on the
> +	 * other engines.
> +	 */
> +	ctx = gem_context_clone(i915, 0, 0,
> +				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	spin = NULL;
> +	for_each_engine(i915, other) {

Ditto.

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_assert(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> +	igt_spin_free(i915, spin);
> +}
> +
> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> +			uint32_t target, uint32_t offset, uint32_t value,
> +			uint32_t cork, unsigned write_domain)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	uint32_t batch[16];
> +	int i;
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> +	execbuf.buffer_count = 2 + !!cork;
> +	execbuf.flags = ring;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +	execbuf.rsvd1 = ctx;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = cork;
> +	obj[1].handle = target;
> +	obj[2].handle = gem_create(i915, 4096);
> +
> +	memset(&reloc, 0, sizeof(reloc));
> +	reloc.target_handle = obj[1].handle;
> +	reloc.presumed_offset = 0;
> +	reloc.offset = sizeof(uint32_t);
> +	reloc.delta = offset;
> +	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +	reloc.write_domain = write_domain;
> +	obj[2].relocs_ptr = to_user_pointer(&reloc);
> +	obj[2].relocation_count = 1;
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = offset;
> +		reloc.offset += sizeof(uint32_t);
> +	} else {
> +		batch[i]--;
> +		batch[++i] = offset;
> +	}
> +	batch[++i] = value;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> +	gem_execbuf(i915, &execbuf);
> +	gem_close(i915, obj[2].handle);
> +}
> +
> +static uint32_t create_highest_priority(int i915)
> +{
> +	uint32_t ctx = gem_context_create(i915);
> +
> +	/*
> +	 * If there is no priority support, all contexts will have equal
> +	 * priority (and therefore the max user priority), so no context
> +	 * can overtake us, and we effectively can form a plug.
> +	 */
> +	__gem_context_set_priority(i915, ctx, MAX_PRIO);
> +
> +	return ctx;
> +}
> +
> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> +{
> +	igt_spin_t *spin[MAX_ELSP_QLEN];

Why is this 16?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = engine,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	igt_cork_unplug(c); /* batches will now be queued on the engine */
> +	igt_debugfs_dump(i915, "i915_engine_info");
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +}
> +
> +static uint32_t store_timestamp(int i915,
> +				uint32_t ctx, unsigned ring,
> +				unsigned mmio_base)
> +{
> +	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +		.relocation_count = 1,
> +	};
> +	struct drm_i915_gem_relocation_entry reloc = {
> +		.target_handle = obj.handle,
> +		.offset = 2 * sizeof(uint32_t),
> +		.delta = 4092,
> +		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	uint32_t batch[] = {
> +		0x24 << 23 | (1 + r64b), /* SRM */
> +		mmio_base + 0x358,
> +		4092,
> +		0,
> +		MI_BATCH_BUFFER_END
> +	};
> +
> +	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> +
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +	obj.relocs_ptr = to_user_pointer(&reloc);
> +
> +	gem_execbuf(i915, &execbuf);
> +
> +	return obj.handle;
> +}
> +
> +static void independent(int i915, unsigned ring, unsigned flags)
> +{
> +	uint32_t handle[ARRAY_SIZE(priorities)];
> +	igt_spin_t *spin[MAX_ELSP_QLEN];
> +	unsigned int mmio_base;
> +
> +	/* XXX i915_query()! */
> +	switch (ring) {
> +	case I915_EXEC_DEFAULT:
> +	case I915_EXEC_RENDER:
> +		mmio_base = 0x2000;
> +		break;
> +#if 0
> +	case I915_EXEC_BSD:
> +		mmio_base = 0x12000;
> +		break;
> +#endif
> +	case I915_EXEC_BLT:
> +		mmio_base = 0x22000;
> +		break;
> +
> +	case I915_EXEC_VEBOX:
> +		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> +			mmio_base = 0x1d8000;
> +		else
> +			mmio_base = 0x1a000;
> +		break;
> +
> +	default:
> +		igt_skip("mmio base not known\n");
> +	}

Ufff this is quite questionable. Should we rather have this subtest in 
selftests only?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = ring,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t ctx = gem_queue_create(i915);
> +		gem_context_set_priority(i915, ctx, priorities[i]);
> +		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t *ptr;
> +
> +		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> +		gem_set_domain(i915, handle[i], /* no write hazard lies! */
> +			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +		gem_close(i915, handle[i]);
> +
> +		handle[i] = ptr[1023];

1023 relates to 4092 from store_timestamp I gather. The two need to be 
defined closer together.

> +		munmap(ptr, 4096);
> +
> +		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> +			  i, priorities[i], handle[i]);
> +	}
> +
> +	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> +}
> +
> +static void reorder(int i915, unsigned ring, unsigned flags)
> +#define EQUAL 1
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +	uint32_t ctx[2];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> +
> +	scratch = gem_create(i915, 4096);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* We expect the high priority context to be executed first, and
> +	 * so the final result will be value from the low priority context.
> +	 */
> +	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> +	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	if (flags & EQUAL) /* equal priority, result will be fifo */
> +		igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	else
> +		igt_assert_eq_u32(ptr[0], ctx[LO]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void promotion(int i915, unsigned ring)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t result, dep;
> +	uint32_t *ptr;
> +	uint32_t ctx[3];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], 0);
> +
> +	ctx[NOISE] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> +
> +	result = gem_create(i915, 4096);
> +	dep = gem_create(i915, 4096);
> +
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> +	 *
> +	 * fifo would be NOISE, LO, HI.
> +	 * strict priority would be  HI, NOISE, LO
> +	 */
> +	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> +	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> +
> +	/* link LO <-> HI via a dependency on another buffer */
> +	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> +	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> +
> +	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[NOISE]);
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> +	gem_set_domain(i915, dep, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, dep);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	munmap(ptr, 4096);
> +
> +	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> +	gem_set_domain(i915, result, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, result);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void smoketest(int i915, unsigned ring, unsigned timeout)
> +{
> +	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	unsigned engines[16];
> +	unsigned nengine;
> +	unsigned engine;
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +
> +	nengine = 0;
> +	for_each_engine(i915, engine) {
> +		if (ignore_engine(engine))
> +			continue;
> +
> +		engines[nengine++] = engine;
> +	}
> +	igt_require(nengine);

for_each_physical and counring the engines for engines array would be 
better I think.

> +
> +	scratch = gem_create(i915, 4096);
> +	igt_fork(child, ncpus) {
> +		unsigned long count = 0;
> +		uint32_t ctx;
> +
> +		hars_petruska_f54_1_random_perturb(child);
> +
> +		ctx = gem_queue_create(i915);
> +		igt_until_timeout(timeout) {
> +			int prio;
> +
> +			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> +			gem_context_set_priority(i915, ctx, prio);
> +
> +			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> +			store_dword(i915, ctx, engine, scratch,
> +				    8*child + 0, ~child,
> +				    0, 0);
> +			for (unsigned int step = 0; step < 8; step++)
> +				store_dword(i915, ctx, engine, scratch,
> +					    8*child + 4, count++,
> +					    0, 0);
> +		}
> +		gem_context_destroy(i915, ctx);
> +	}
> +	igt_waitchildren();
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	for (unsigned n = 0; n < ncpus; n++) {
> +		igt_assert_eq_u32(ptr[2*n], ~n);
> +		/*
> +		 * Note this count is approximate due to unconstrained
> +		 * ordering of the dword writes between engines.
> +		 *
> +		 * Take the result with a pinch of salt.
> +		 */
> +		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> +	}
> +	munmap(ptr, 4096);
> +}
> +
> +igt_main
> +{
> +	const struct intel_execution_engine *e;
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +	}
> +
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_contexts_has_shared_gtt(i915));
> +			igt_fork_hang_detector(i915);
> +		}
> +
> +		igt_subtest("create-shared-gtt")
> +			create_shared_gtt(i915, 0);
> +
> +		igt_subtest("detached-shared-gtt")
> +			create_shared_gtt(i915, DETACHED);
> +
> +		igt_subtest("disjoint-timelines")
> +			disjoint_timelines(i915);
> +
> +		igt_subtest("single-timeline")
> +			single_timeline(i915);
> +
> +		igt_subtest("exhaust-shared-gtt")
> +			exhaust_shared_gtt(i915, 0);
> +
> +		igt_subtest("exhaust-shared-gtt-lrc")
> +			exhaust_shared_gtt(i915, EXHAUST_LRC);
> +
> +		for (e = intel_execution_engines; e->name; e++) {
> +			igt_subtest_f("exec-shared-gtt-%s", e->name)
> +				exec_shared_gtt(i915, e->exec_id | e->flags);

The same previously raised question on should it iterate the legacy 
execbuf engines or physical engines. Maybe you won't different subtests 
do both?

> +
> +			if (!ignore_engine(e->exec_id | e->flags)) {
> +				igt_subtest_f("exec-single-timeline-%s",
> +					      e->name)
> +					exec_single_timeline(i915,
> +							     e->exec_id | e->flags);
> +			}
> +
> +			/*
> +			 * Check that the shared contexts operate independently,
> +			 * that is requests on one ("queue") can be scheduled
> +			 * around another queue. We only check the basics here,
> +			 * enough to reduce the queue into just another context,
> +			 * and so rely on gem_exec_schedule to prove the rest.
> +			 */
> +			igt_subtest_group {
> +				igt_fixture {
> +					gem_require_ring(i915, e->exec_id | e->flags);
> +					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
> +					igt_require(gem_scheduler_enabled(i915));
> +					igt_require(gem_scheduler_has_ctx_priority(i915));
> +				}
> +
> +				igt_subtest_f("Q-independent-%s", e->name)
> +					independent(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-in-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, EQUAL);
> +
> +				igt_subtest_f("Q-out-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-promotion-%s", e->name)
> +					promotion(i915, e->exec_id | e->flags);
> +
> +				igt_subtest_f("Q-smoketest-%s", e->name)
> +					smoketest(i915, e->exec_id | e->flags, 5);
> +			}
> +		}
> +
> +		igt_subtest("Q-smoketest-all") {
> +			igt_require(gem_scheduler_enabled(i915));
> +			igt_require(gem_scheduler_has_ctx_priority(i915));
> +			smoketest(i915, -1, 30);
> +		}
> +
> +		igt_fixture {
> +			igt_stop_hang_detector();
> +		}
> +	}
> +}
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index 6c3b53756..d3e0b0ba2 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define HANG 0x20
>   #define SYNC 0x40
>   #define PRIORITY 0x80
> +#define QUEUES 0x100
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
>   {
>   	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
>   	gem_context_set_priority(fd, ctx, prio);
> -};
> +}
>   
>   static void whisper(int fd, unsigned engine, unsigned flags)
>   {
> @@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & CONTEXTS)
>   		gem_require_contexts(fd);
>   
> +	if (flags & QUEUES)
> +		igt_require(gem_has_queues(fd));
> +
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> @@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				contexts[n] = gem_context_create(fd);
>   		}
> +		if (flags & QUEUES) {
> +			for (n = 0; n < 64; n++)
> +				contexts[n] = gem_queue_create(fd);
> +		}
>   		if (flags & FDS) {
>   			for (n = 0; n < 64; n++)
>   				fds[n] = drm_open_driver(DRIVER_INTEL);
> @@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   						execbuf.flags &= ~ENGINE_MASK;
>   						execbuf.flags |= engines[rand() % nengine];
>   					}
> -					if (flags & CONTEXTS) {
> +					if (flags & (CONTEXTS | QUEUES)) {
>   						execbuf.rsvd1 = contexts[rand() % 64];
>   						if (flags & PRIORITY)
>   							ctx_set_random_priority(this_fd, execbuf.rsvd1);
> @@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				close(fds[n]);
>   		}
> -		if (flags & CONTEXTS) {
> +		if (flags & (CONTEXTS | QUEUES)) {
>   			for (n = 0; n < 64; n++)
>   				gem_context_destroy(fd, contexts[n]);
>   		}
> @@ -522,18 +530,24 @@ igt_main
>   		{ "chain-forked", CHAIN | FORKED },
>   		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
>   		{ "chain-sync", CHAIN | SYNC },
> -		{ "contexts", CONTEXTS },
> -		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> -		{ "contexts-forked", CONTEXTS | FORKED},
> -		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> -		{ "contexts-chain", CONTEXTS | CHAIN },
> -		{ "contexts-sync", CONTEXTS | SYNC },
>   		{ "fds", FDS },
>   		{ "fds-interruptible", FDS | INTERRUPTIBLE},
>   		{ "fds-forked", FDS | FORKED},
>   		{ "fds-priority", FDS | FORKED | PRIORITY },
>   		{ "fds-chain", FDS | CHAIN},
>   		{ "fds-sync", FDS | SYNC},
> +		{ "contexts", CONTEXTS },
> +		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> +		{ "contexts-forked", CONTEXTS | FORKED},
> +		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> +		{ "contexts-chain", CONTEXTS | CHAIN },
> +		{ "contexts-sync", CONTEXTS | SYNC },
> +		{ "queues", QUEUES },
> +		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
> +		{ "queues-forked", QUEUES | FORKED},
> +		{ "queues-priority", QUEUES | FORKED | PRIORITY },
> +		{ "queues-chain", QUEUES | CHAIN },
> +		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
>   	int fd;
> diff --git a/tests/meson.build b/tests/meson.build
> index 3810bd760..3883ae127 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -114,6 +114,7 @@ i915_progs = [
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
>   	'gem_ctx_param',
> +	'gem_ctx_shared',
>   	'gem_ctx_switch',
>   	'gem_ctx_thrash',
>   	'gem_double_irq_loop',
> 

Regards,

Tvrtko
Chris Wilson May 15, 2019, 7:33 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > v2: Test each shared context is its own timeline and allows request
> > reordering between shared contexts.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> >   lib/i915/gem_context.c        |  68 +++
> >   lib/i915/gem_context.h        |  13 +
> >   tests/Makefile.sources        |   1 +
> >   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
> >   tests/i915/gem_exec_whisper.c |  32 +-
> >   tests/meson.build             |   1 +
> >   6 files changed, 962 insertions(+), 9 deletions(-)
> >   create mode 100644 tests/i915/gem_ctx_shared.c
> > 
> > diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> > index f94d89cb4..8fb8984d1 100644
> > --- a/lib/i915/gem_context.c
> > +++ b/lib/i915/gem_context.c
> > @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
> >       igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
> >   }
> >   
> > +int
> > +__gem_context_clone(int i915,
> > +                 uint32_t src, unsigned int share,
> > +                 unsigned int flags,
> > +                 uint32_t *out)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone clone = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = src,
> > +             .flags = share,
> > +     };
> > +     struct drm_i915_gem_context_create_ext arg = {
> > +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&clone),
> > +     };
> > +     int err = 0;
> > +
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> > +             err = -errno;
> > +
> > +     *out = arg.ctx_id;
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, share, flags, &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     errno = 0;
> > +     return ctx;
> > +}
> > +
> > +bool gem_contexts_has_shared_gtt(int i915)
> > +{
> > +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> > +}
> > +
> > +bool gem_has_queues(int i915)
> > +{
> > +     return __gem_context_has(i915,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> > +
> > +     return ctx;
> > +}
> > +
> > +uint32_t gem_queue_create(int i915)
> > +{
> > +     return gem_context_clone(i915, 0,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> >   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
> >   {
> >       struct drm_i915_gem_exec_object2 exec = {};
> > diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> > index a052714d4..8043c3401 100644
> > --- a/lib/i915/gem_context.h
> > +++ b/lib/i915/gem_context.h
> > @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
> >   void gem_context_destroy(int fd, uint32_t ctx_id);
> >   int __gem_context_destroy(int fd, uint32_t ctx_id);
> >   
> > +int __gem_context_clone(int i915,
> > +                     uint32_t src, unsigned int share,
> > +                     unsigned int flags,
> > +                     uint32_t *out);
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags);
> > +
> > +uint32_t gem_queue_create(int i915);
> > +
> > +bool gem_contexts_has_shared_gtt(int i915);
> > +bool gem_has_queues(int i915);
> > +
> >   bool gem_has_contexts(int fd);
> >   void gem_require_contexts(int fd);
> >   void gem_context_require_bannable(int fd);
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e1b7feeb2..3552e895b 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -22,6 +22,7 @@ TESTS_progs = \
> >       drm_mm \
> >       drm_read \
> >       i915/gem_ctx_clone \
> > +     i915/gem_ctx_shared \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> > new file mode 100644
> > index 000000000..0076f5e9d
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_shared.c
> > @@ -0,0 +1,856 @@
> > +/*
> > + * Copyright © 2017 Intel Corporation
> 
> 2019

Nah, that would imply I put any thought into touching it since.

> > +static void exhaust_shared_gtt(int i915, unsigned int flags)
> > +#define EXHAUST_LRC 0x1
> > +{
> > +     i915 = gem_reopen_driver(i915);
> > +
> > +     igt_fork(pid, 1) {
> > +             const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +             struct drm_i915_gem_exec_object2 obj = {
> > +                     .handle = gem_create(i915, 4096)
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +             };
> > +             uint32_t parent, child;
> > +             unsigned long count = 0;
> > +             int err;
> > +
> > +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +             child = 0;
> > +             for (;;) {
> > +                     parent = child;
> > +                     err = __gem_context_clone(i915,
> > +                                               parent, I915_CONTEXT_CLONE_VM,
> > +                                               0, &child);
> > +                     if (err)
> > +                             break;
> > +
> > +                     if (flags & EXHAUST_LRC) {
> > +                             execbuf.rsvd1 = child;
> > +                             err = __gem_execbuf(i915, &execbuf);
> > +                             if (err)
> > +                                     break;
> > +                     }
> 
> What are the stop conditions in this test, with and without the 
> EXHAUST_LRC flag? It would be good to put that in a comment.

It runs until the kernel dies. The giveaway is meant to be the test name.
 
> Especially since AFAIR this one was causing OOM for me so might need to 
> be tweaked.

It runs until the kernel dies.

> > +
> > +                     count++;
> > +             }
> > +             gem_sync(i915, obj.handle);
> > +
> > +             igt_info("Created %lu shared contexts, before %d (%s)\n",
> > +                      count, err, strerror(-err));
> > +     }
> > +     close(i915);
> > +     igt_waitchildren();
> > +}
> > +
> > +static void exec_shared_gtt(int i915, unsigned int ring)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096)
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +     };
> > +     uint32_t scratch = obj.handle;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(gem_can_store_dword(i915, ring));
> > +
> > +     /* Load object into place in the GTT */
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     /* Presume nothing causes an eviction in the meantime */
> > +
> > +     obj.handle = gem_create(i915, 4096);
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = obj.offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = obj.offset;
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = obj.offset;
> > +     }
> > +     batch[++i] = 0xc0ffee;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +
> > +     obj.offset += 4096; /* make sure we don't cause an eviction! */
> 
> Is 4k apart safe?

Since to change would imply an ABI break and I see no param indicating
an ABI change, and Joonas keeps on refusing to add such information.
 
> A short comment on how does this test work would be good.
> 
> > +     obj.flags |= EXEC_OBJECT_PINNED;
> > +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> > +     if (gen > 3 && gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_context_destroy(i915, execbuf.rsvd1);
> > +     gem_sync(i915, obj.handle); /* write hazard lies */
> > +     gem_close(i915, obj.handle);
> > +
> > +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> > +     gem_close(i915, scratch);
> > +
> > +     igt_assert_eq_u32(*batch, 0xc0ffee);
> > +}
> > +
> > +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     int err;
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     err = gem_wait(i915, obj.handle, &timeout);
> > +     gem_close(i915, obj.handle);
> > +
> > +     return err;
> > +}
> > +
> > +static bool has_single_timeline(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, 0,
> > +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> > +                         &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     return ctx != 0;
> > +}
> > +
> > +static bool ignore_engine(unsigned engine)
> > +{
> > +     if (engine == 0)
> > +             return true;
> > +
> > +     if (engine == I915_EXEC_BSD)
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static void single_timeline(int i915)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +     };
> > +     struct sync_fence_info rings[16];
> 
> Could use for_each_physical_engine to count the engines. But we probably 
> have plenty of this around the code base.
> 
> > +     struct sync_file_info sync_file_info = {
> > +             .num_fences = 1,
> > +     };
> > +     unsigned int engine;
> > +     int n;
> > +
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +
> > +     /*
> > +      * For a "single timeline" context, each ring is on the common
> > +      * timeline, unlike a normal context where each ring has an
> > +      * independent timeline. That is no matter which engine we submit
> > +      * to, it reports the same timeline name and fence context. However,
> > +      * the fence context is not reported through the sync_fence_info.
> 
> Is the test useful then? There was one I reviewed earlier in this series 
> which tested for execution ordering, which sounds like is what's needed.

It is a variant. This one is a couple of years older. Both accomplish
similar things through very different means, the more the serendipitous.

> 
> > +      */
> > +     execbuf.rsvd1 =
> > +             gem_context_clone(i915, 0, 0,
> > +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     execbuf.flags = I915_EXEC_FENCE_OUT;
> > +     n = 0;
> > +     for_each_engine(i915, engine) {
> 
> for_each_physical_engine to align with Andi's work?

No, this would be an ABI iterator not a physical iterator.
 
> > +             gem_execbuf_wr(i915, &execbuf);
> > +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> > +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> > +             close(execbuf.rsvd2 >> 32);
> > +
> > +             igt_info("ring[%d] fence: %s %s\n",
> > +                      n, rings[n].driver_name, rings[n].obj_name);
> > +             n++;
> > +     }
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +
> > +     for (int i = 1; i < n; i++) {
> > +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> > +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
> 
> What is in obj_name?

The timeline name. sync_file is plain old useless. The asserts here are
drivel.

> > +     }
> > +}
> > +
> > +static void exec_single_timeline(int i915, unsigned int ring)
> > +{
> > +     unsigned int other;
> > +     igt_spin_t *spin;
> > +     uint32_t ctx;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     /*
> > +      * On an ordinary context, a blockage on one ring doesn't prevent
> > +      * execution on an other.
> > +      */
> > +     ctx = 0;
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> for_each_physical

Modern inventions.
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_require(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> > +     igt_spin_free(i915, spin);
> > +
> > +     /*
> > +      * But if we create a context with just a single shared timeline,
> > +      * then it will block waiting for the earlier requests on the
> > +      * other engines.
> > +      */
> > +     ctx = gem_context_clone(i915, 0, 0,
> > +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> Ditto.

Hey! Where was that when this code was written^W copied :-p
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_assert(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> > +     igt_spin_free(i915, spin);
> > +}
> > +
> > +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> > +                     uint32_t target, uint32_t offset, uint32_t value,
> > +                     uint32_t cork, unsigned write_domain)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     struct drm_i915_gem_exec_object2 obj[3];
> > +     struct drm_i915_gem_relocation_entry reloc;
> > +     struct drm_i915_gem_execbuffer2 execbuf;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     memset(&execbuf, 0, sizeof(execbuf));
> > +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> > +     execbuf.buffer_count = 2 + !!cork;
> > +     execbuf.flags = ring;
> > +     if (gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +     execbuf.rsvd1 = ctx;
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = cork;
> > +     obj[1].handle = target;
> > +     obj[2].handle = gem_create(i915, 4096);
> > +
> > +     memset(&reloc, 0, sizeof(reloc));
> > +     reloc.target_handle = obj[1].handle;
> > +     reloc.presumed_offset = 0;
> > +     reloc.offset = sizeof(uint32_t);
> > +     reloc.delta = offset;
> > +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > +     reloc.write_domain = write_domain;
> > +     obj[2].relocs_ptr = to_user_pointer(&reloc);
> > +     obj[2].relocation_count = 1;
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = offset;
> > +             reloc.offset += sizeof(uint32_t);
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = offset;
> > +     }
> > +     batch[++i] = value;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_close(i915, obj[2].handle);
> > +}
> > +
> > +static uint32_t create_highest_priority(int i915)
> > +{
> > +     uint32_t ctx = gem_context_create(i915);
> > +
> > +     /*
> > +      * If there is no priority support, all contexts will have equal
> > +      * priority (and therefore the max user priority), so no context
> > +      * can overtake us, and we effectively can form a plug.
> > +      */
> > +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
> > +
> > +     return ctx;
> > +}
> > +
> > +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> > +{
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> 
> Why is this 16?

2x as big as the deepest known qlen. And 16 is that number that crops up
everywhere as a "just big enough number"
 
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = engine,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     igt_cork_unplug(c); /* batches will now be queued on the engine */
> > +     igt_debugfs_dump(i915, "i915_engine_info");
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +}
> > +
> > +static uint32_t store_timestamp(int i915,
> > +                             uint32_t ctx, unsigned ring,
> > +                             unsigned mmio_base)
> > +{
> > +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +             .relocation_count = 1,
> > +     };
> > +     struct drm_i915_gem_relocation_entry reloc = {
> > +             .target_handle = obj.handle,
> > +             .offset = 2 * sizeof(uint32_t),
> > +             .delta = 4092,
> > +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     uint32_t batch[] = {
> > +             0x24 << 23 | (1 + r64b), /* SRM */
> > +             mmio_base + 0x358,
> > +             4092,
> > +             0,
> > +             MI_BATCH_BUFFER_END
> > +     };
> > +
> > +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> > +
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +     obj.relocs_ptr = to_user_pointer(&reloc);
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     return obj.handle;
> > +}
> > +
> > +static void independent(int i915, unsigned ring, unsigned flags)
> > +{
> > +     uint32_t handle[ARRAY_SIZE(priorities)];
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> > +     unsigned int mmio_base;
> > +
> > +     /* XXX i915_query()! */
> > +     switch (ring) {
> > +     case I915_EXEC_DEFAULT:
> > +     case I915_EXEC_RENDER:
> > +             mmio_base = 0x2000;
> > +             break;
> > +#if 0
> > +     case I915_EXEC_BSD:
> > +             mmio_base = 0x12000;
> > +             break;
> > +#endif
> > +     case I915_EXEC_BLT:
> > +             mmio_base = 0x22000;
> > +             break;
> > +
> > +     case I915_EXEC_VEBOX:
> > +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> > +                     mmio_base = 0x1d8000;
> > +             else
> > +                     mmio_base = 0x1a000;
> > +             break;
> > +
> > +     default:
> > +             igt_skip("mmio base not known\n");
> > +     }
> 
> Ufff this is quite questionable. Should we rather have this subtest in 
> selftests only?

We should be exporting this information. It is a non-privileged register
that is used by normal clients to measure elapsed time.

> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = ring,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t ctx = gem_queue_create(i915);
> > +             gem_context_set_priority(i915, ctx, priorities[i]);
> > +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t *ptr;
> > +
> > +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> > +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
> > +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +             gem_close(i915, handle[i]);
> > +
> > +             handle[i] = ptr[1023];
> 
> 1023 relates to 4092 from store_timestamp I gather. The two need to be 
> defined closer together.
> 
> > +             munmap(ptr, 4096);
> > +
> > +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> > +                       i, priorities[i], handle[i]);
> > +     }
> > +
> > +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> > +}
> > +
> > +static void reorder(int i915, unsigned ring, unsigned flags)
> > +#define EQUAL 1
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[2];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* We expect the high priority context to be executed first, and
> > +      * so the final result will be value from the low priority context.
> > +      */
> > +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> > +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     if (flags & EQUAL) /* equal priority, result will be fifo */
> > +             igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     else
> > +             igt_assert_eq_u32(ptr[0], ctx[LO]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void promotion(int i915, unsigned ring)
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t result, dep;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[3];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], 0);
> > +
> > +     ctx[NOISE] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> > +
> > +     result = gem_create(i915, 4096);
> > +     dep = gem_create(i915, 4096);
> > +
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> > +      *
> > +      * fifo would be NOISE, LO, HI.
> > +      * strict priority would be  HI, NOISE, LO
> > +      */
> > +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> > +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> > +
> > +     /* link LO <-> HI via a dependency on another buffer */
> > +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> > +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> > +
> > +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[NOISE]);
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> > +     gem_set_domain(i915, dep, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, dep);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     munmap(ptr, 4096);
> > +
> > +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> > +     gem_set_domain(i915, result, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, result);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void smoketest(int i915, unsigned ring, unsigned timeout)
> > +{
> > +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +     unsigned engines[16];
> > +     unsigned nengine;
> > +     unsigned engine;
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +
> > +     nengine = 0;
> > +     for_each_engine(i915, engine) {
> > +             if (ignore_engine(engine))
> > +                     continue;
> > +
> > +             engines[nengine++] = engine;
> > +     }
> > +     igt_require(nengine);
> 
> for_each_physical and counring the engines for engines array would be 
> better I think.
> 
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     igt_fork(child, ncpus) {
> > +             unsigned long count = 0;
> > +             uint32_t ctx;
> > +
> > +             hars_petruska_f54_1_random_perturb(child);
> > +
> > +             ctx = gem_queue_create(i915);
> > +             igt_until_timeout(timeout) {
> > +                     int prio;
> > +
> > +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> > +                     gem_context_set_priority(i915, ctx, prio);
> > +
> > +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> > +                     store_dword(i915, ctx, engine, scratch,
> > +                                 8*child + 0, ~child,
> > +                                 0, 0);
> > +                     for (unsigned int step = 0; step < 8; step++)
> > +                             store_dword(i915, ctx, engine, scratch,
> > +                                         8*child + 4, count++,
> > +                                         0, 0);
> > +             }
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +     igt_waitchildren();
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     for (unsigned n = 0; n < ncpus; n++) {
> > +             igt_assert_eq_u32(ptr[2*n], ~n);
> > +             /*
> > +              * Note this count is approximate due to unconstrained
> > +              * ordering of the dword writes between engines.
> > +              *
> > +              * Take the result with a pinch of salt.
> > +              */
> > +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> > +     }
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +igt_main
> > +{
> > +     const struct intel_execution_engine *e;
> > +     int i915 = -1;
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +     }
> > +
> > +     igt_subtest_group {
> > +             igt_fixture {
> > +                     igt_require(gem_contexts_has_shared_gtt(i915));
> > +                     igt_fork_hang_detector(i915);
> > +             }
> > +
> > +             igt_subtest("create-shared-gtt")
> > +                     create_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("detached-shared-gtt")
> > +                     create_shared_gtt(i915, DETACHED);
> > +
> > +             igt_subtest("disjoint-timelines")
> > +                     disjoint_timelines(i915);
> > +
> > +             igt_subtest("single-timeline")
> > +                     single_timeline(i915);
> > +
> > +             igt_subtest("exhaust-shared-gtt")
> > +                     exhaust_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("exhaust-shared-gtt-lrc")
> > +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
> > +
> > +             for (e = intel_execution_engines; e->name; e++) {
> > +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
> > +                             exec_shared_gtt(i915, e->exec_id | e->flags);
> 
> The same previously raised question on should it iterate the legacy 
> execbuf engines or physical engines. Maybe you won't different subtests 
> do both?

It should be testing the cross between the context and execbuf uABI, not
physical.
-Chris
Tvrtko Ursulin May 16, 2019, 8:51 a.m. UTC | #3
On 15/05/2019 20:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> v2: Test each shared context is its own timeline and allows request
>>> reordering between shared contexts.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> ---
>>>    lib/i915/gem_context.c        |  68 +++
>>>    lib/i915/gem_context.h        |  13 +
>>>    tests/Makefile.sources        |   1 +
>>>    tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>>>    tests/i915/gem_exec_whisper.c |  32 +-
>>>    tests/meson.build             |   1 +
>>>    6 files changed, 962 insertions(+), 9 deletions(-)
>>>    create mode 100644 tests/i915/gem_ctx_shared.c
>>>
>>> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
>>> index f94d89cb4..8fb8984d1 100644
>>> --- a/lib/i915/gem_context.c
>>> +++ b/lib/i915/gem_context.c
>>> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>>>        igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>>>    }
>>>    
>>> +int
>>> +__gem_context_clone(int i915,
>>> +                 uint32_t src, unsigned int share,
>>> +                 unsigned int flags,
>>> +                 uint32_t *out)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_clone clone = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
>>> +             .clone_id = src,
>>> +             .flags = share,
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext arg = {
>>> +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
>>> +             .extensions = to_user_pointer(&clone),
>>> +     };
>>> +     int err = 0;
>>> +
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
>>> +             err = -errno;
>>> +
>>> +     *out = arg.ctx_id;
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, share, flags, &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     errno = 0;
>>> +     return ctx;
>>> +}
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915)
>>> +{
>>> +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
>>> +}
>>> +
>>> +bool gem_has_queues(int i915)
>>> +{
>>> +     return __gem_context_has(i915,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +uint32_t gem_queue_create(int i915)
>>> +{
>>> +     return gem_context_clone(i915, 0,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>>    bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>>>    {
>>>        struct drm_i915_gem_exec_object2 exec = {};
>>> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
>>> index a052714d4..8043c3401 100644
>>> --- a/lib/i915/gem_context.h
>>> +++ b/lib/i915/gem_context.h
>>> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>>>    void gem_context_destroy(int fd, uint32_t ctx_id);
>>>    int __gem_context_destroy(int fd, uint32_t ctx_id);
>>>    
>>> +int __gem_context_clone(int i915,
>>> +                     uint32_t src, unsigned int share,
>>> +                     unsigned int flags,
>>> +                     uint32_t *out);
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags);
>>> +
>>> +uint32_t gem_queue_create(int i915);
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915);
>>> +bool gem_has_queues(int i915);
>>> +
>>>    bool gem_has_contexts(int fd);
>>>    void gem_require_contexts(int fd);
>>>    void gem_context_require_bannable(int fd);
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e1b7feeb2..3552e895b 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -22,6 +22,7 @@ TESTS_progs = \
>>>        drm_mm \
>>>        drm_read \
>>>        i915/gem_ctx_clone \
>>> +     i915/gem_ctx_shared \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
>>> new file mode 100644
>>> index 000000000..0076f5e9d
>>> --- /dev/null
>>> +++ b/tests/i915/gem_ctx_shared.c
>>> @@ -0,0 +1,856 @@
>>> +/*
>>> + * Copyright © 2017 Intel Corporation
>>
>> 2019
> 
> Nah, that would imply I put any thought into touching it since.
> 
>>> +static void exhaust_shared_gtt(int i915, unsigned int flags)
>>> +#define EXHAUST_LRC 0x1
>>> +{
>>> +     i915 = gem_reopen_driver(i915);
>>> +
>>> +     igt_fork(pid, 1) {
>>> +             const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +             struct drm_i915_gem_exec_object2 obj = {
>>> +                     .handle = gem_create(i915, 4096)
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&obj),
>>> +                     .buffer_count = 1,
>>> +             };
>>> +             uint32_t parent, child;
>>> +             unsigned long count = 0;
>>> +             int err;
>>> +
>>> +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +             child = 0;
>>> +             for (;;) {
>>> +                     parent = child;
>>> +                     err = __gem_context_clone(i915,
>>> +                                               parent, I915_CONTEXT_CLONE_VM,
>>> +                                               0, &child);
>>> +                     if (err)
>>> +                             break;
>>> +
>>> +                     if (flags & EXHAUST_LRC) {
>>> +                             execbuf.rsvd1 = child;
>>> +                             err = __gem_execbuf(i915, &execbuf);
>>> +                             if (err)
>>> +                                     break;
>>> +                     }
>>
>> What are the stop conditions in this test, with and without the
>> EXHAUST_LRC flag? It would be good to put that in a comment.
> 
> It runs until the kernel dies. The giveaway is meant to be the test name.
>   
>> Especially since AFAIR this one was causing OOM for me so might need to
>> be tweaked.
> 
> It runs until the kernel dies.

Aren't we not allowed to add failing tests?

> 
>>> +
>>> +                     count++;
>>> +             }
>>> +             gem_sync(i915, obj.handle);
>>> +
>>> +             igt_info("Created %lu shared contexts, before %d (%s)\n",
>>> +                      count, err, strerror(-err));
>>> +     }
>>> +     close(i915);
>>> +     igt_waitchildren();
>>> +}
>>> +
>>> +static void exec_shared_gtt(int i915, unsigned int ring)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096)
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +     };
>>> +     uint32_t scratch = obj.handle;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(gem_can_store_dword(i915, ring));
>>> +
>>> +     /* Load object into place in the GTT */
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     /* Presume nothing causes an eviction in the meantime */
>>> +
>>> +     obj.handle = gem_create(i915, 4096);
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = obj.offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = obj.offset;
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = obj.offset;
>>> +     }
>>> +     batch[++i] = 0xc0ffee;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +
>>> +     obj.offset += 4096; /* make sure we don't cause an eviction! */
>>
>> Is 4k apart safe?
> 
> Since to change would imply an ABI break and I see no param indicating
> an ABI change, and Joonas keeps on refusing to add such information.

Why it would be an ABI break? Why would the driver be not allowed to add 
arbitrary padding, or use larger pgtable entry or something, so if you 
assume +4k is empty it could actually not be?

>   
>> A short comment on how does this test work would be good.
>>
>>> +     obj.flags |= EXEC_OBJECT_PINNED;
>>> +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
>>> +     if (gen > 3 && gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_context_destroy(i915, execbuf.rsvd1);
>>> +     gem_sync(i915, obj.handle); /* write hazard lies */
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
>>> +     gem_close(i915, scratch);
>>> +
>>> +     igt_assert_eq_u32(*batch, 0xc0ffee);
>>> +}
>>> +
>>> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     int err;
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     err = gem_wait(i915, obj.handle, &timeout);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     return err;
>>> +}
>>> +
>>> +static bool has_single_timeline(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, 0,
>>> +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
>>> +                         &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     return ctx != 0;
>>> +}
>>> +
>>> +static bool ignore_engine(unsigned engine)
>>> +{
>>> +     if (engine == 0)
>>> +             return true;
>>> +
>>> +     if (engine == I915_EXEC_BSD)
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static void single_timeline(int i915)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +     };
>>> +     struct sync_fence_info rings[16];
>>
>> Could use for_each_physical_engine to count the engines. But we probably
>> have plenty of this around the code base.
>>
>>> +     struct sync_file_info sync_file_info = {
>>> +             .num_fences = 1,
>>> +     };
>>> +     unsigned int engine;
>>> +     int n;
>>> +
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +
>>> +     /*
>>> +      * For a "single timeline" context, each ring is on the common
>>> +      * timeline, unlike a normal context where each ring has an
>>> +      * independent timeline. That is no matter which engine we submit
>>> +      * to, it reports the same timeline name and fence context. However,
>>> +      * the fence context is not reported through the sync_fence_info.
>>
>> Is the test useful then? There was one I reviewed earlier in this series
>> which tested for execution ordering, which sounds like is what's needed.
> 
> It is a variant. This one is a couple of years older. Both accomplish
> similar things through very different means, the more the serendipitous.
> 
>>
>>> +      */
>>> +     execbuf.rsvd1 =
>>> +             gem_context_clone(i915, 0, 0,
>>> +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     execbuf.flags = I915_EXEC_FENCE_OUT;
>>> +     n = 0;
>>> +     for_each_engine(i915, engine) {
>>
>> for_each_physical_engine to align with Andi's work?
> 
> No, this would be an ABI iterator not a physical iterator.

Legacy ABI, why? It then covers a subset of engines so is it useful to 
cover more than one but not all?

>   
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
>>> +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
>>> +             close(execbuf.rsvd2 >> 32);
>>> +
>>> +             igt_info("ring[%d] fence: %s %s\n",
>>> +                      n, rings[n].driver_name, rings[n].obj_name);
>>> +             n++;
>>> +     }
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     for (int i = 1; i < n; i++) {
>>> +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
>>> +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
>>
>> What is in obj_name?
> 
> The timeline name. sync_file is plain old useless. The asserts here are
> drivel.
> 
>>> +     }
>>> +}
>>> +
>>> +static void exec_single_timeline(int i915, unsigned int ring)
>>> +{
>>> +     unsigned int other;
>>> +     igt_spin_t *spin;
>>> +     uint32_t ctx;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     /*
>>> +      * On an ordinary context, a blockage on one ring doesn't prevent
>>> +      * execution on an other.
>>> +      */
>>> +     ctx = 0;
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> for_each_physical
> 
> Modern inventions.
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_require(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     /*
>>> +      * But if we create a context with just a single shared timeline,
>>> +      * then it will block waiting for the earlier requests on the
>>> +      * other engines.
>>> +      */
>>> +     ctx = gem_context_clone(i915, 0, 0,
>>> +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> Ditto.
> 
> Hey! Where was that when this code was written^W copied :-p
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_assert(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
>>> +     igt_spin_free(i915, spin);
>>> +}
>>> +
>>> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
>>> +                     uint32_t target, uint32_t offset, uint32_t value,
>>> +                     uint32_t cork, unsigned write_domain)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     struct drm_i915_gem_exec_object2 obj[3];
>>> +     struct drm_i915_gem_relocation_entry reloc;
>>> +     struct drm_i915_gem_execbuffer2 execbuf;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     memset(&execbuf, 0, sizeof(execbuf));
>>> +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
>>> +     execbuf.buffer_count = 2 + !!cork;
>>> +     execbuf.flags = ring;
>>> +     if (gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +     execbuf.rsvd1 = ctx;
>>> +
>>> +     memset(obj, 0, sizeof(obj));
>>> +     obj[0].handle = cork;
>>> +     obj[1].handle = target;
>>> +     obj[2].handle = gem_create(i915, 4096);
>>> +
>>> +     memset(&reloc, 0, sizeof(reloc));
>>> +     reloc.target_handle = obj[1].handle;
>>> +     reloc.presumed_offset = 0;
>>> +     reloc.offset = sizeof(uint32_t);
>>> +     reloc.delta = offset;
>>> +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
>>> +     reloc.write_domain = write_domain;
>>> +     obj[2].relocs_ptr = to_user_pointer(&reloc);
>>> +     obj[2].relocation_count = 1;
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = offset;
>>> +             reloc.offset += sizeof(uint32_t);
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = offset;
>>> +     }
>>> +     batch[++i] = value;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_close(i915, obj[2].handle);
>>> +}
>>> +
>>> +static uint32_t create_highest_priority(int i915)
>>> +{
>>> +     uint32_t ctx = gem_context_create(i915);
>>> +
>>> +     /*
>>> +      * If there is no priority support, all contexts will have equal
>>> +      * priority (and therefore the max user priority), so no context
>>> +      * can overtake us, and we effectively can form a plug.
>>> +      */
>>> +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
>>> +{
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>
>> Why is this 16?
> 
> 2x as big as the deepest known qlen. And 16 is that number that crops up
> everywhere as a "just big enough number"
>   
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = engine,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     igt_cork_unplug(c); /* batches will now be queued on the engine */
>>> +     igt_debugfs_dump(i915, "i915_engine_info");
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +}
>>> +
>>> +static uint32_t store_timestamp(int i915,
>>> +                             uint32_t ctx, unsigned ring,
>>> +                             unsigned mmio_base)
>>> +{
>>> +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +             .relocation_count = 1,
>>> +     };
>>> +     struct drm_i915_gem_relocation_entry reloc = {
>>> +             .target_handle = obj.handle,
>>> +             .offset = 2 * sizeof(uint32_t),
>>> +             .delta = 4092,
>>> +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     uint32_t batch[] = {
>>> +             0x24 << 23 | (1 + r64b), /* SRM */
>>> +             mmio_base + 0x358,
>>> +             4092,
>>> +             0,
>>> +             MI_BATCH_BUFFER_END
>>> +     };
>>> +
>>> +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
>>> +
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +     obj.relocs_ptr = to_user_pointer(&reloc);
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     return obj.handle;
>>> +}
>>> +
>>> +static void independent(int i915, unsigned ring, unsigned flags)
>>> +{
>>> +     uint32_t handle[ARRAY_SIZE(priorities)];
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>> +     unsigned int mmio_base;
>>> +
>>> +     /* XXX i915_query()! */
>>> +     switch (ring) {
>>> +     case I915_EXEC_DEFAULT:
>>> +     case I915_EXEC_RENDER:
>>> +             mmio_base = 0x2000;
>>> +             break;
>>> +#if 0
>>> +     case I915_EXEC_BSD:
>>> +             mmio_base = 0x12000;
>>> +             break;
>>> +#endif
>>> +     case I915_EXEC_BLT:
>>> +             mmio_base = 0x22000;
>>> +             break;
>>> +
>>> +     case I915_EXEC_VEBOX:
>>> +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
>>> +                     mmio_base = 0x1d8000;
>>> +             else
>>> +                     mmio_base = 0x1a000;
>>> +             break;
>>> +
>>> +     default:
>>> +             igt_skip("mmio base not known\n");
>>> +     }
>>
>> Ufff this is quite questionable. Should we rather have this subtest in
>> selftests only?
> 
> We should be exporting this information. It is a non-privileged register
> that is used by normal clients to measure elapsed time.

I see.. who uses it? Mesa? Is it just one register? What would make more 
sense - add a query to read this register, or add a query to get the 
register address, or mmio_base (as you were proposing some time ago)?

> 
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = ring,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t ctx = gem_queue_create(i915);
>>> +             gem_context_set_priority(i915, ctx, priorities[i]);
>>> +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t *ptr;
>>> +
>>> +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
>>> +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
>>> +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +             gem_close(i915, handle[i]);
>>> +
>>> +             handle[i] = ptr[1023];
>>
>> 1023 relates to 4092 from store_timestamp I gather. The two need to be
>> defined closer together.
>>
>>> +             munmap(ptr, 4096);
>>> +
>>> +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
>>> +                       i, priorities[i], handle[i]);
>>> +     }
>>> +
>>> +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
>>> +}
>>> +
>>> +static void reorder(int i915, unsigned ring, unsigned flags)
>>> +#define EQUAL 1
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[2];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* We expect the high priority context to be executed first, and
>>> +      * so the final result will be value from the low priority context.
>>> +      */
>>> +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
>>> +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     if (flags & EQUAL) /* equal priority, result will be fifo */
>>> +             igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     else
>>> +             igt_assert_eq_u32(ptr[0], ctx[LO]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void promotion(int i915, unsigned ring)
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t result, dep;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[3];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], 0);
>>> +
>>> +     ctx[NOISE] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
>>> +
>>> +     result = gem_create(i915, 4096);
>>> +     dep = gem_create(i915, 4096);
>>> +
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
>>> +      *
>>> +      * fifo would be NOISE, LO, HI.
>>> +      * strict priority would be  HI, NOISE, LO
>>> +      */
>>> +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
>>> +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
>>> +
>>> +     /* link LO <-> HI via a dependency on another buffer */
>>> +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
>>> +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
>>> +
>>> +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[NOISE]);
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
>>> +     gem_set_domain(i915, dep, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, dep);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     munmap(ptr, 4096);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
>>> +     gem_set_domain(i915, result, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, result);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void smoketest(int i915, unsigned ring, unsigned timeout)
>>> +{
>>> +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> +     unsigned engines[16];
>>> +     unsigned nengine;
>>> +     unsigned engine;
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +
>>> +     nengine = 0;
>>> +     for_each_engine(i915, engine) {
>>> +             if (ignore_engine(engine))
>>> +                     continue;
>>> +
>>> +             engines[nengine++] = engine;
>>> +     }
>>> +     igt_require(nengine);
>>
>> for_each_physical and counring the engines for engines array would be
>> better I think.
>>
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     igt_fork(child, ncpus) {
>>> +             unsigned long count = 0;
>>> +             uint32_t ctx;
>>> +
>>> +             hars_petruska_f54_1_random_perturb(child);
>>> +
>>> +             ctx = gem_queue_create(i915);
>>> +             igt_until_timeout(timeout) {
>>> +                     int prio;
>>> +
>>> +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
>>> +                     gem_context_set_priority(i915, ctx, prio);
>>> +
>>> +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
>>> +                     store_dword(i915, ctx, engine, scratch,
>>> +                                 8*child + 0, ~child,
>>> +                                 0, 0);
>>> +                     for (unsigned int step = 0; step < 8; step++)
>>> +                             store_dword(i915, ctx, engine, scratch,
>>> +                                         8*child + 4, count++,
>>> +                                         0, 0);
>>> +             }
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +     igt_waitchildren();
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     for (unsigned n = 0; n < ncpus; n++) {
>>> +             igt_assert_eq_u32(ptr[2*n], ~n);
>>> +             /*
>>> +              * Note this count is approximate due to unconstrained
>>> +              * ordering of the dword writes between engines.
>>> +              *
>>> +              * Take the result with a pinch of salt.
>>> +              */
>>> +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
>>> +     }
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     const struct intel_execution_engine *e;
>>> +     int i915 = -1;
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +     }
>>> +
>>> +     igt_subtest_group {
>>> +             igt_fixture {
>>> +                     igt_require(gem_contexts_has_shared_gtt(i915));
>>> +                     igt_fork_hang_detector(i915);
>>> +             }
>>> +
>>> +             igt_subtest("create-shared-gtt")
>>> +                     create_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("detached-shared-gtt")
>>> +                     create_shared_gtt(i915, DETACHED);
>>> +
>>> +             igt_subtest("disjoint-timelines")
>>> +                     disjoint_timelines(i915);
>>> +
>>> +             igt_subtest("single-timeline")
>>> +                     single_timeline(i915);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt")
>>> +                     exhaust_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt-lrc")
>>> +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
>>> +
>>> +             for (e = intel_execution_engines; e->name; e++) {
>>> +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
>>> +                             exec_shared_gtt(i915, e->exec_id | e->flags);
>>
>> The same previously raised question on should it iterate the legacy
>> execbuf engines or physical engines. Maybe you won't different subtests
>> do both?
> 
> It should be testing the cross between the context and execbuf uABI, not
> physical.

Same question as earlier, why is this cross interesting? I mean what is 
interesting in intersection between legacy execbuf engine selection and 
ppgtt sharing? And at the same time not relevant to exercise the new 
execbuf engine selection abi?

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index f94d89cb4..8fb8984d1 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -272,6 +272,74 @@  void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
 	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
 }
 
+int
+__gem_context_clone(int i915,
+		    uint32_t src, unsigned int share,
+		    unsigned int flags,
+		    uint32_t *out)
+{
+	struct drm_i915_gem_context_create_ext_clone clone = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.clone_id = src,
+		.flags = share,
+	};
+	struct drm_i915_gem_context_create_ext arg = {
+		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&clone),
+	};
+	int err = 0;
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
+		err = -errno;
+
+	*out = arg.ctx_id;
+
+	errno = 0;
+	return err;
+}
+
+static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, share, flags, &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	errno = 0;
+	return ctx;
+}
+
+bool gem_contexts_has_shared_gtt(int i915)
+{
+	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
+}
+
+bool gem_has_queues(int i915)
+{
+	return __gem_context_has(i915,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags)
+{
+	uint32_t ctx;
+
+	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
+
+	return ctx;
+}
+
+uint32_t gem_queue_create(int i915)
+{
+	return gem_context_clone(i915, 0,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
 bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
 {
 	struct drm_i915_gem_exec_object2 exec = {};
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index a052714d4..8043c3401 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -29,6 +29,19 @@  int __gem_context_create(int fd, uint32_t *ctx_id);
 void gem_context_destroy(int fd, uint32_t ctx_id);
 int __gem_context_destroy(int fd, uint32_t ctx_id);
 
+int __gem_context_clone(int i915,
+			uint32_t src, unsigned int share,
+			unsigned int flags,
+			uint32_t *out);
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags);
+
+uint32_t gem_queue_create(int i915);
+
+bool gem_contexts_has_shared_gtt(int i915);
+bool gem_has_queues(int i915);
+
 bool gem_has_contexts(int fd);
 void gem_require_contexts(int fd);
 void gem_context_require_bannable(int fd);
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index e1b7feeb2..3552e895b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -22,6 +22,7 @@  TESTS_progs = \
 	drm_mm \
 	drm_read \
 	i915/gem_ctx_clone \
+	i915/gem_ctx_shared \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
new file mode 100644
index 000000000..0076f5e9d
--- /dev/null
+++ b/tests/i915/gem_ctx_shared.c
@@ -0,0 +1,856 @@ 
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "igt_rand.h"
+#include "igt_vgem.h"
+#include "sync_file.h"
+
+#define LO 0
+#define HI 1
+#define NOISE 2
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+static int priorities[] = {
+	[LO] = MIN_PRIO / 2,
+	[HI] = MAX_PRIO / 2,
+};
+
+#define MAX_ELSP_QLEN 16
+
+IGT_TEST_DESCRIPTION("Test shared contexts.");
+
+static void create_shared_gtt(int i915, unsigned int flags)
+#define DETACHED 0x1
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	uint32_t parent, child;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	child = flags & DETACHED ? gem_context_create(i915) : 0;
+	igt_until_timeout(2) {
+		parent = flags & DETACHED ? child : 0;
+		child = gem_context_clone(i915,
+					  parent, I915_CONTEXT_CLONE_VM,
+					  0);
+		execbuf.rsvd1 = child;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & DETACHED) {
+			gem_context_destroy(i915, parent);
+			gem_execbuf(i915, &execbuf);
+		} else {
+			parent = child;
+			gem_context_destroy(i915, parent);
+		}
+
+		execbuf.rsvd1 = parent;
+		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
+		igt_assert_eq(__gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &parent), -ENOENT);
+	}
+	if (flags & DETACHED)
+		gem_context_destroy(i915, child);
+
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void disjoint_timelines(int i915)
+{
+	IGT_CORK_HANDLE(cork);
+	igt_spin_t *spin[2];
+	uint32_t plug, child;
+
+	igt_require(gem_has_execlists(i915));
+
+	/*
+	 * Each context, although they share a vm, are expected to be
+	 * distinct timelines. A request queued to one context should be
+	 * independent of any shared contexts.
+	 */
+	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	plug = igt_cork_plug(&cork, i915);
+
+	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
+	spin[1] = __igt_spin_new(i915, .ctx = child);
+
+	/* Wait for the second spinner, will hang if stuck behind the first */
+	igt_spin_end(spin[1]);
+	gem_sync(i915, spin[1]->handle);
+
+	igt_cork_unplug(&cork);
+
+	igt_spin_free(i915, spin[1]);
+	igt_spin_free(i915, spin[0]);
+}
+
+static void exhaust_shared_gtt(int i915, unsigned int flags)
+#define EXHAUST_LRC 0x1
+{
+	i915 = gem_reopen_driver(i915);
+
+	igt_fork(pid, 1) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj = {
+			.handle = gem_create(i915, 4096)
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&obj),
+			.buffer_count = 1,
+		};
+		uint32_t parent, child;
+		unsigned long count = 0;
+		int err;
+
+		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+		child = 0;
+		for (;;) {
+			parent = child;
+			err = __gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &child);
+			if (err)
+				break;
+
+			if (flags & EXHAUST_LRC) {
+				execbuf.rsvd1 = child;
+				err = __gem_execbuf(i915, &execbuf);
+				if (err)
+					break;
+			}
+
+			count++;
+		}
+		gem_sync(i915, obj.handle);
+
+		igt_info("Created %lu shared contexts, before %d (%s)\n",
+			 count, err, strerror(-err));
+	}
+	close(i915);
+	igt_waitchildren();
+}
+
+static void exec_shared_gtt(int i915, unsigned int ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+	uint32_t scratch = obj.handle;
+	uint32_t batch[16];
+	int i;
+
+	gem_require_ring(i915, ring);
+	igt_require(gem_can_store_dword(i915, ring));
+
+	/* Load object into place in the GTT */
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+
+	/* Presume nothing causes an eviction in the meantime */
+
+	obj.handle = gem_create(i915, 4096);
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = obj.offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = obj.offset;
+	} else {
+		batch[i]--;
+		batch[++i] = obj.offset;
+	}
+	batch[++i] = 0xc0ffee;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+
+	obj.offset += 4096; /* make sure we don't cause an eviction! */
+	obj.flags |= EXEC_OBJECT_PINNED;
+	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	gem_execbuf(i915, &execbuf);
+	gem_context_destroy(i915, execbuf.rsvd1);
+	gem_sync(i915, obj.handle); /* write hazard lies */
+	gem_close(i915, obj.handle);
+
+	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
+	gem_close(i915, scratch);
+
+	igt_assert_eq_u32(*batch, 0xc0ffee);
+}
+
+static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	int err;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	err = gem_wait(i915, obj.handle, &timeout);
+	gem_close(i915, obj.handle);
+
+	return err;
+}
+
+static bool has_single_timeline(int i915)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, 0,
+			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
+			    &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	return ctx != 0;
+}
+
+static bool ignore_engine(unsigned engine)
+{
+	if (engine == 0)
+		return true;
+
+	if (engine == I915_EXEC_BSD)
+		return true;
+
+	return false;
+}
+
+static void single_timeline(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	struct sync_fence_info rings[16];
+	struct sync_file_info sync_file_info = {
+		.num_fences = 1,
+	};
+	unsigned int engine;
+	int n;
+
+	igt_require(has_single_timeline(i915));
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	/*
+	 * For a "single timeline" context, each ring is on the common
+	 * timeline, unlike a normal context where each ring has an
+	 * independent timeline. That is no matter which engine we submit
+	 * to, it reports the same timeline name and fence context. However,
+	 * the fence context is not reported through the sync_fence_info.
+	 */
+	execbuf.rsvd1 =
+		gem_context_clone(i915, 0, 0,
+				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	execbuf.flags = I915_EXEC_FENCE_OUT;
+	n = 0;
+	for_each_engine(i915, engine) {
+		gem_execbuf_wr(i915, &execbuf);
+		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
+		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
+		close(execbuf.rsvd2 >> 32);
+
+		igt_info("ring[%d] fence: %s %s\n",
+			 n, rings[n].driver_name, rings[n].obj_name);
+		n++;
+	}
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	for (int i = 1; i < n; i++) {
+		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
+		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
+	}
+}
+
+static void exec_single_timeline(int i915, unsigned int ring)
+{
+	unsigned int other;
+	igt_spin_t *spin;
+	uint32_t ctx;
+
+	gem_require_ring(i915, ring);
+	igt_require(has_single_timeline(i915));
+
+	/*
+	 * On an ordinary context, a blockage on one ring doesn't prevent
+	 * execution on an other.
+	 */
+	ctx = 0;
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_require(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
+	igt_spin_free(i915, spin);
+
+	/*
+	 * But if we create a context with just a single shared timeline,
+	 * then it will block waiting for the earlier requests on the
+	 * other engines.
+	 */
+	ctx = gem_context_clone(i915, 0, 0,
+				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_assert(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
+	igt_spin_free(i915, spin);
+}
+
+static void store_dword(int i915, uint32_t ctx, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value,
+			uint32_t cork, unsigned write_domain)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
+	execbuf.buffer_count = 2 + !!cork;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	execbuf.rsvd1 = ctx;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cork;
+	obj[1].handle = target;
+	obj[2].handle = gem_create(i915, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[1].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = write_domain;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+	obj[2].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
+	gem_execbuf(i915, &execbuf);
+	gem_close(i915, obj[2].handle);
+}
+
+static uint32_t create_highest_priority(int i915)
+{
+	uint32_t ctx = gem_context_create(i915);
+
+	/*
+	 * If there is no priority support, all contexts will have equal
+	 * priority (and therefore the max user priority), so no context
+	 * can overtake us, and we effectively can form a plug.
+	 */
+	__gem_context_set_priority(i915, ctx, MAX_PRIO);
+
+	return ctx;
+}
+
+static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = engine,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	igt_cork_unplug(c); /* batches will now be queued on the engine */
+	igt_debugfs_dump(i915, "i915_engine_info");
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+}
+
+static uint32_t store_timestamp(int i915,
+				uint32_t ctx, unsigned ring,
+				unsigned mmio_base)
+{
+	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_relocation_entry reloc = {
+		.target_handle = obj.handle,
+		.offset = 2 * sizeof(uint32_t),
+		.delta = 4092,
+		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	uint32_t batch[] = {
+		0x24 << 23 | (1 + r64b), /* SRM */
+		mmio_base + 0x358,
+		4092,
+		0,
+		MI_BATCH_BUFFER_END
+	};
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
+
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+	obj.relocs_ptr = to_user_pointer(&reloc);
+
+	gem_execbuf(i915, &execbuf);
+
+	return obj.handle;
+}
+
+static void independent(int i915, unsigned ring, unsigned flags)
+{
+	uint32_t handle[ARRAY_SIZE(priorities)];
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	unsigned int mmio_base;
+
+	/* XXX i915_query()! */
+	switch (ring) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = ring,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t ctx = gem_queue_create(i915);
+		gem_context_set_priority(i915, ctx, priorities[i]);
+		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
+		gem_context_destroy(i915, ctx);
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t *ptr;
+
+		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
+		gem_set_domain(i915, handle[i], /* no write hazard lies! */
+			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		gem_close(i915, handle[i]);
+
+		handle[i] = ptr[1023];
+		munmap(ptr, 4096);
+
+		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
+			  i, priorities[i], handle[i]);
+	}
+
+	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
+}
+
+static void reorder(int i915, unsigned ring, unsigned flags)
+#define EQUAL 1
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch;
+	uint32_t *ptr;
+	uint32_t ctx[2];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
+
+	scratch = gem_create(i915, 4096);
+	plug = igt_cork_plug(&cork, i915);
+
+	/* We expect the high priority context to be executed first, and
+	 * so the final result will be value from the low priority context.
+	 */
+	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
+	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	if (flags & EQUAL) /* equal priority, result will be fifo */
+		igt_assert_eq_u32(ptr[0], ctx[HI]);
+	else
+		igt_assert_eq_u32(ptr[0], ctx[LO]);
+	munmap(ptr, 4096);
+}
+
+static void promotion(int i915, unsigned ring)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t result, dep;
+	uint32_t *ptr;
+	uint32_t ctx[3];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], 0);
+
+	ctx[NOISE] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
+
+	result = gem_create(i915, 4096);
+	dep = gem_create(i915, 4096);
+
+	plug = igt_cork_plug(&cork, i915);
+
+	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
+	 *
+	 * fifo would be NOISE, LO, HI.
+	 * strict priority would be  HI, NOISE, LO
+	 */
+	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
+	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
+
+	/* link LO <-> HI via a dependency on another buffer */
+	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
+	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
+
+	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[NOISE]);
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
+	gem_set_domain(i915, dep, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, dep);
+
+	igt_assert_eq_u32(ptr[0], ctx[HI]);
+	munmap(ptr, 4096);
+
+	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
+	gem_set_domain(i915, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, result);
+
+	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
+	munmap(ptr, 4096);
+}
+
+static void smoketest(int i915, unsigned ring, unsigned timeout)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	uint32_t scratch;
+	uint32_t *ptr;
+
+	nengine = 0;
+	for_each_engine(i915, engine) {
+		if (ignore_engine(engine))
+			continue;
+
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	scratch = gem_create(i915, 4096);
+	igt_fork(child, ncpus) {
+		unsigned long count = 0;
+		uint32_t ctx;
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		ctx = gem_queue_create(i915);
+		igt_until_timeout(timeout) {
+			int prio;
+
+			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
+			gem_context_set_priority(i915, ctx, prio);
+
+			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
+			store_dword(i915, ctx, engine, scratch,
+				    8*child + 0, ~child,
+				    0, 0);
+			for (unsigned int step = 0; step < 8; step++)
+				store_dword(i915, ctx, engine, scratch,
+					    8*child + 4, count++,
+					    0, 0);
+		}
+		gem_context_destroy(i915, ctx);
+	}
+	igt_waitchildren();
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	for (unsigned n = 0; n < ncpus; n++) {
+		igt_assert_eq_u32(ptr[2*n], ~n);
+		/*
+		 * Note this count is approximate due to unconstrained
+		 * ordering of the dword writes between engines.
+		 *
+		 * Take the result with a pinch of salt.
+		 */
+		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
+	}
+	munmap(ptr, 4096);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_contexts_has_shared_gtt(i915));
+			igt_fork_hang_detector(i915);
+		}
+
+		igt_subtest("create-shared-gtt")
+			create_shared_gtt(i915, 0);
+
+		igt_subtest("detached-shared-gtt")
+			create_shared_gtt(i915, DETACHED);
+
+		igt_subtest("disjoint-timelines")
+			disjoint_timelines(i915);
+
+		igt_subtest("single-timeline")
+			single_timeline(i915);
+
+		igt_subtest("exhaust-shared-gtt")
+			exhaust_shared_gtt(i915, 0);
+
+		igt_subtest("exhaust-shared-gtt-lrc")
+			exhaust_shared_gtt(i915, EXHAUST_LRC);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("exec-shared-gtt-%s", e->name)
+				exec_shared_gtt(i915, e->exec_id | e->flags);
+
+			if (!ignore_engine(e->exec_id | e->flags)) {
+				igt_subtest_f("exec-single-timeline-%s",
+					      e->name)
+					exec_single_timeline(i915,
+							     e->exec_id | e->flags);
+			}
+
+			/*
+			 * Check that the shared contexts operate independently,
+			 * that is requests on one ("queue") can be scheduled
+			 * around another queue. We only check the basics here,
+			 * enough to reduce the queue into just another context,
+			 * and so rely on gem_exec_schedule to prove the rest.
+			 */
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_ring(i915, e->exec_id | e->flags);
+					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
+					igt_require(gem_scheduler_enabled(i915));
+					igt_require(gem_scheduler_has_ctx_priority(i915));
+				}
+
+				igt_subtest_f("Q-independent-%s", e->name)
+					independent(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-in-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, EQUAL);
+
+				igt_subtest_f("Q-out-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-promotion-%s", e->name)
+					promotion(i915, e->exec_id | e->flags);
+
+				igt_subtest_f("Q-smoketest-%s", e->name)
+					smoketest(i915, e->exec_id | e->flags, 5);
+			}
+		}
+
+		igt_subtest("Q-smoketest-all") {
+			igt_require(gem_scheduler_enabled(i915));
+			igt_require(gem_scheduler_has_ctx_priority(i915));
+			smoketest(i915, -1, 30);
+		}
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+	}
+}
diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index 6c3b53756..d3e0b0ba2 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -87,6 +87,7 @@  static void verify_reloc(int fd, uint32_t handle,
 #define HANG 0x20
 #define SYNC 0x40
 #define PRIORITY 0x80
+#define QUEUES 0x100
 
 struct hang {
 	struct drm_i915_gem_exec_object2 obj;
@@ -171,7 +172,7 @@  static void ctx_set_random_priority(int fd, uint32_t ctx)
 {
 	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
 	gem_context_set_priority(fd, ctx, prio);
-};
+}
 
 static void whisper(int fd, unsigned engine, unsigned flags)
 {
@@ -226,6 +227,9 @@  static void whisper(int fd, unsigned engine, unsigned flags)
 	if (flags & CONTEXTS)
 		gem_require_contexts(fd);
 
+	if (flags & QUEUES)
+		igt_require(gem_has_queues(fd));
+
 	if (flags & HANG)
 		init_hang(&hang);
 
@@ -290,6 +294,10 @@  static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				contexts[n] = gem_context_create(fd);
 		}
+		if (flags & QUEUES) {
+			for (n = 0; n < 64; n++)
+				contexts[n] = gem_queue_create(fd);
+		}
 		if (flags & FDS) {
 			for (n = 0; n < 64; n++)
 				fds[n] = drm_open_driver(DRIVER_INTEL);
@@ -403,7 +411,7 @@  static void whisper(int fd, unsigned engine, unsigned flags)
 						execbuf.flags &= ~ENGINE_MASK;
 						execbuf.flags |= engines[rand() % nengine];
 					}
-					if (flags & CONTEXTS) {
+					if (flags & (CONTEXTS | QUEUES)) {
 						execbuf.rsvd1 = contexts[rand() % 64];
 						if (flags & PRIORITY)
 							ctx_set_random_priority(this_fd, execbuf.rsvd1);
@@ -486,7 +494,7 @@  static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				close(fds[n]);
 		}
-		if (flags & CONTEXTS) {
+		if (flags & (CONTEXTS | QUEUES)) {
 			for (n = 0; n < 64; n++)
 				gem_context_destroy(fd, contexts[n]);
 		}
@@ -522,18 +530,24 @@  igt_main
 		{ "chain-forked", CHAIN | FORKED },
 		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
 		{ "chain-sync", CHAIN | SYNC },
-		{ "contexts", CONTEXTS },
-		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
-		{ "contexts-forked", CONTEXTS | FORKED},
-		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
-		{ "contexts-chain", CONTEXTS | CHAIN },
-		{ "contexts-sync", CONTEXTS | SYNC },
 		{ "fds", FDS },
 		{ "fds-interruptible", FDS | INTERRUPTIBLE},
 		{ "fds-forked", FDS | FORKED},
 		{ "fds-priority", FDS | FORKED | PRIORITY },
 		{ "fds-chain", FDS | CHAIN},
 		{ "fds-sync", FDS | SYNC},
+		{ "contexts", CONTEXTS },
+		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
+		{ "contexts-forked", CONTEXTS | FORKED},
+		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
+		{ "contexts-chain", CONTEXTS | CHAIN },
+		{ "contexts-sync", CONTEXTS | SYNC },
+		{ "queues", QUEUES },
+		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
+		{ "queues-forked", QUEUES | FORKED},
+		{ "queues-priority", QUEUES | FORKED | PRIORITY },
+		{ "queues-chain", QUEUES | CHAIN },
+		{ "queues-sync", QUEUES | SYNC },
 		{ NULL }
 	};
 	int fd;
diff --git a/tests/meson.build b/tests/meson.build
index 3810bd760..3883ae127 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -114,6 +114,7 @@  i915_progs = [
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
+	'gem_ctx_shared',
 	'gem_ctx_switch',
 	'gem_ctx_thrash',
 	'gem_double_irq_loop',