diff mbox series

[29/38] drm/i915: Expose user control over the ppGTT associated with a context

Message ID 20190118140109.25261-30-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/38] drm/i915/execlists: Store the highest priority context | expand

Commit Message

Chris Wilson Jan. 18, 2019, 2:01 p.m. UTC
Allow the user to share ppGTT between contexts on the same fd. This
gives the user the ability to relax their context isolation to share vm
between their own contexts, but does not allow them to import a vm from
another fd. The use case for sharing a vm is for the proposed virtual
engine work where a context may be created explicitly to setup a load
balancing engine, but always be run in conjunction with a second context
for rcs/compute etc. By giving control to the user on how they setup the
vm allows for them to have a single vm between all kernel contexts being
used to emulate a single client context, similarly to how we use a
single vm across all engines within a single kernel context today. It
also allows for future specification a separate vm between engines
inside a single kernel context should that be desired.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 118 ++++++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  17 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 247 ++++++++++++++----
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
 include/uapi/drm/i915_drm.h                   |   1 +
 8 files changed, 339 insertions(+), 68 deletions(-)

Comments

Tvrtko Ursulin Jan. 23, 2019, noon UTC | #1
On 18/01/2019 14:01, Chris Wilson wrote:
> Allow the user to share ppGTT between contexts on the same fd. This
> gives the user the ability to relax their context isolation to share vm
> between their own contexts, but does not allow them to import a vm from
> another fd. The use case for sharing a vm is for the proposed virtual
> engine work where a context may be created explicitly to setup a load
> balancing engine, but always be run in conjunction with a second context
> for rcs/compute etc. By giving control to the user on how they setup the
> vm allows for them to have a single vm between all kernel contexts being
> used to emulate a single client context, similarly to how we use a
> single vm across all engines within a single kernel context today. It
> also allows for future specification a separate vm between engines
> inside a single kernel context should that be desired.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       | 118 ++++++++-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  17 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 247 ++++++++++++++----
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
>   drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
>   include/uapi/drm/i915_drm.h                   |   1 +
>   8 files changed, 339 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 7c90981704bf..f707241dbc78 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -109,6 +109,8 @@ static void lut_close(struct i915_gem_context *ctx)
>   		struct i915_vma *vma = rcu_dereference_raw(*slot);
>   
>   		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +
> +		vma->open_count--;

I did not figure out what is this. A) why open coded vma management 
without any comments, and b) rest of the patch doesn't seem to touch 
this tree.

>   		__i915_gem_object_release_unless_active(vma->obj);
>   	}
>   	rcu_read_unlock();
> @@ -291,7 +293,7 @@ static void context_close(struct i915_gem_context *ctx)
>   	 */
>   	lut_close(ctx);
>   	if (ctx->ppgtt)
> -		i915_ppgtt_close(&ctx->ppgtt->vm);
> +		i915_ppgtt_close(ctx->ppgtt);

I'll need to figure out if it is okay for context to close the ppgtt 
instead of just dropping references to it. Like two contexts sharing 
ppgtt and one closes it, the other one should continue to work fine, no? 
Or even a third context is created sharing the same ppgtt.

>   
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   	i915_gem_context_put(ctx);
> @@ -401,6 +403,23 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   	context_close(ctx);
>   }
>   
> +static void __set_ppgtt(struct i915_gem_context *ctx,
> +			struct i915_hw_ppgtt *ppgtt)
> +{
> +	if (ppgtt == ctx->ppgtt)
> +		return;
> +
> +	if (ctx->ppgtt) {
> +		i915_ppgtt_close(ctx->ppgtt);

Feels incorrect to close it if it could be shared and in use elsewhere.

> +		i915_ppgtt_put(ctx->ppgtt);
> +	}
> +
> +	i915_ppgtt_open(ppgtt);

Do we need some protection against trying to re-open a closed ppgtt here?

> +	ctx->ppgtt = i915_ppgtt_get(ppgtt);
> +
> +	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
> +}
> +
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
>   			struct drm_i915_file_private *file_priv)
> @@ -427,8 +446,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   			return ERR_CAST(ppgtt);
>   		}
>   
> -		ctx->ppgtt = ppgtt;
> -		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> +		__set_ppgtt(ctx, ppgtt);
> +		i915_ppgtt_put(ppgtt);
>   	}
>   
>   	trace_i915_context_create(ctx);
> @@ -784,6 +803,87 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
>   	return 0;
>   }
>   
> +static int get_ppgtt(struct i915_gem_context *ctx, u64 *out)

u32 *out ?

> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int ret;
> +
> +	/* XXX rcu acquire? */
> +	ppgtt = ctx->ppgtt;
> +	if (!ppgtt)
> +		return -ENODEV;
> +
> +	ret = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (ret)
> +		return ret;
> +
> +	ret = ppgtt->user_handle;
> +	if (!ret) {
> +		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);

GEM_WARN_ON(ret == 0) just in case?

> +		if (ret > 0) {
> +			ppgtt->user_handle = ret;
> +			i915_ppgtt_get(ppgtt);
> +		}
> +	}
> +
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (ret < 0)
> +		return ret;
> +
> +	*out = ret;
> +	return 0;
> +}
> +
> +static int set_ppgtt(struct i915_gem_context *ctx, u32 id)
> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int err;
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		return err;
> +
> +	ppgtt = idr_find(&file_priv->vm_idr, id);
> +	if (ppgtt)
> +		i915_ppgtt_get(ppgtt);
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (!ppgtt)
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (err)
> +		goto out;
> +
> +	/*
> +	 * We need to flush any requests using the current ppgtt before
> +	 * we release it as the requests do not hold a reference themselves,
> +	 * only indirectly through the context. By switching to the kernel
> +	 * context, we ensure that the TLBs are reloaded before using the
> +	 * same context again -- an extra layer of paranoia over wait_for_idle.
> +	 */
> +	err = i915_gem_switch_to_kernel_context(ctx->i915);
> +	if (err)
> +		goto out_unlock;
> +
> +	err = i915_gem_wait_for_idle(ctx->i915,
> +				     I915_WAIT_LOCKED |
> +				     I915_WAIT_INTERRUPTIBLE,
> +				     MAX_SCHEDULE_TIMEOUT);

This is a bit worrying. Every new client setting up their contexts 
causes a global sync point. Sounds bad for scalability. It may be that 
in practice the event might be happening only every few seconds, rather 
than multiple times per second, but I am only guessing. How difficult to 
make requests own ppgtt directly?

> +	if (err)
> +		goto out_unlock;
> +
> +	__set_ppgtt(ctx, ppgtt);
> +
> +out_unlock:
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +out:
> +	i915_ppgtt_put(ppgtt);
> +	return err;
> +}
> +
>   static bool client_is_banned(struct drm_i915_file_private *file_priv)
>   {
>   	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
> @@ -896,6 +996,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_PRIORITY:
>   		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
>   		break;
> +	case I915_CONTEXT_PARAM_VM:
> +		ret = get_ppgtt(ctx, &args->value);
> +		break;
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -968,6 +1071,15 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_VM:
> +		if (args->size)
> +			ret = -EINVAL;
> +		else if (upper_32_bits(args->value))
> +			ret = -EINVAL;
> +		else
> +			ret = set_ppgtt(ctx, lower_32_bits(args->value));
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 49b00996a15e..27b40c14e745 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2104,10 +2104,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
>   	return ppgtt;
>   }
>   
> -void i915_ppgtt_close(struct i915_address_space *vm)
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
>   {
> -	GEM_BUG_ON(vm->closed);
> -	vm->closed = true;
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +
> +	ppgtt->open_count++;
> +}
> +
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
> +{
> +	GEM_BUG_ON(!ppgtt->open_count);
> +	if (--ppgtt->open_count)
> +		return;
> +
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +	ppgtt->vm.closed = true;
>   }
>   
>   static void ppgtt_destroy_vma(struct i915_address_space *vm)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 61698609a62c..bb750318f52a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -391,6 +391,8 @@ struct i915_hw_ppgtt {
>   	struct kref ref;
>   
>   	unsigned long pd_dirty_rings;
> +	unsigned int open_count;
> +
>   	union {
>   		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
>   		struct i915_page_directory_pointer pdp;	/* GEN8+ */
> @@ -608,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
>   void i915_ppgtt_release(struct kref *kref);
>   struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
>   					struct drm_i915_file_private *fpriv);
> -void i915_ppgtt_close(struct i915_address_space *vm);
> -static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> +
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
> +
> +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
>   {
> -	if (ppgtt)
> -		kref_get(&ppgtt->ref);
> +	kref_get(&ppgtt->ref);
> +	return ppgtt;

Unrelated hunk?

>   }
> +
>   static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>   {
>   	if (ppgtt)
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index a9a2fa35876f..a7ee8e97bcee 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
>   	err = i915_subtests(tests, ppgtt);
>   
>   out_close:
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   
>   out_unlock:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 6a241745e78a..2864cfb82325 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -243,6 +243,12 @@ static int live_nop_switch(void *arg)
>   	return err;
>   }
>   
> +#define GEN8_HIGH_ADDRESS_BIT 47
> +static inline u64 gen8_canonical_addr(u64 address)
> +{
> +	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
> +}

We could move the copy from i915_gem_execbuffer.c to i915_gem_utils.h or 
somewhere.

> +
>   static struct i915_vma *
>   gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
>   {
> @@ -266,6 +272,7 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
>   
>   	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
>   	offset += vma->node.start;
> +	offset = gen8_canonical_addr(offset);
>   
>   	for (n = 0; n < count; n++) {
>   		if (gen >= 8) {
> @@ -391,6 +398,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   		goto skip_request;
>   
>   	i915_gem_object_set_active_reference(batch->obj);
> +
>   	i915_vma_unpin(batch);
>   	i915_vma_close(batch);
>   
> @@ -439,7 +447,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>   	return 0;
>   }
>   
> -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
> +static noinline int cpu_check(struct drm_i915_gem_object *obj,
> +			      unsigned int idx, unsigned int max)
>   {
>   	unsigned int n, m, needs_flush;
>   	int err;
> @@ -457,8 +466,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (m = 0; m < max; m++) {
>   			if (map[m] != m) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], m);
> +				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
> +				       __builtin_return_address(0), idx,
> +				       n, real_page_count(obj), m, max,
> +				       map[m], m);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -466,8 +477,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (; m < DW_PER_PAGE; m++) {
>   			if (map[m] != STACK_MAGIC) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], STACK_MAGIC);
> +				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
> +				       __builtin_return_address(0), idx, n, m,
> +				       map[m], STACK_MAGIC);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -545,12 +557,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
>   static int igt_ctx_exec(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> -	struct drm_i915_gem_object *obj = NULL;
> -	unsigned long ncontexts, ndwords, dw;
> -	struct drm_file *file;
> -	IGT_TIMEOUT(end_time);
> -	LIST_HEAD(objects);
> -	struct live_test t;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   	int err = -ENODEV;
>   
>   	/*
> @@ -562,38 +570,42 @@ static int igt_ctx_exec(void *arg)
>   	if (!DRIVER_CAPS(i915)->has_logical_contexts)
>   		return 0;
>   
> -	file = mock_file(i915);
> -	if (IS_ERR(file))
> -		return PTR_ERR(file);
> +	for_each_engine(engine, i915, id) {
> +		struct drm_i915_gem_object *obj = NULL;
> +		unsigned long ncontexts, ndwords, dw;
> +		struct drm_file *file;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +		struct live_test t;
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
>   
> -	err = begin_live_test(&t, i915, __func__, "");
> -	if (err)
> -		goto out_unlock;
> +		if (!engine->context_size)
> +			continue; /* No logical context support in HW */
>   
> -	ncontexts = 0;
> -	ndwords = 0;
> -	dw = 0;
> -	while (!time_after(jiffies, end_time)) {
> -		struct intel_engine_cs *engine;
> -		struct i915_gem_context *ctx;
> -		unsigned int id;
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
>   
> -		ctx = i915_gem_create_context(i915, file->driver_priv);
> -		if (IS_ERR(ctx)) {
> -			err = PTR_ERR(ctx);
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		err = begin_live_test(&t, i915, __func__, engine->name);
> +		if (err)
>   			goto out_unlock;
> -		}
>   
> -		for_each_engine(engine, i915, id) {
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			struct i915_gem_context *ctx;
>   			intel_wakeref_t wakeref;
>   
> -			if (!engine->context_size)
> -				continue; /* No logical context support in HW */
> -
> -			if (!intel_engine_can_store_dword(engine))
> -				continue;
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
>   
>   			if (!obj) {
>   				obj = create_test_object(ctx, file, &objects);
> @@ -603,7 +615,6 @@ static int igt_ctx_exec(void *arg)
>   				}
>   			}
>   
> -			err = 0;
>   			with_intel_runtime_pm(i915, wakeref)
>   				err = gpu_fill(obj, ctx, engine, dw);
>   			if (err) {
> @@ -618,32 +629,158 @@ static int igt_ctx_exec(void *arg)
>   				obj = NULL;
>   				dw = 0;
>   			}
> +
>   			ndwords++;
> +			ncontexts++;
> +		}
> +
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
> +
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
> +
> +			dw += rem;
>   		}
> -		ncontexts++;
> +
> +out_unlock:
> +		if (end_live_test(&t))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
>   	}
> -	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
> -		ncontexts, RUNTIME_INFO(i915)->num_rings, ndwords);
>   
> -	dw = 0;
> -	list_for_each_entry(obj, &objects, st_link) {
> -		unsigned int rem =
> -			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +	return 0;
> +}
>   
> -		err = cpu_check(obj, rem);
> +static int igt_shared_ctx_exec(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = -ENODEV;
> +
> +	/*
> +	 * Create a few different contexts with the same mm and write
> +	 * through each ctx using the GPU making sure those writes end
> +	 * up in the expected pages of our obj.
> +	 */
> +
> +	for_each_engine(engine, i915, id) {
> +		unsigned long ncontexts, ndwords, dw;
> +		struct drm_i915_gem_object *obj = NULL;
> +		struct i915_gem_context *ctx = NULL;
> +		struct i915_gem_context *parent;
> +		struct drm_file *file;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +		struct live_test t;
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		err = begin_live_test(&t, i915, __func__, engine->name);
>   		if (err)
> -			break;
> +			goto out_unlock;
>   
> -		dw += rem;
> -	}
> +		parent = i915_gem_create_context(i915, file->driver_priv);
> +		if (IS_ERR(parent)) {
> +			err = PTR_ERR(parent);
> +			if (err == -ENODEV) /* no logical ctx support */
> +				err = 0;
> +			goto out_unlock;
> +		}
> +
> +		if (!parent->ppgtt) {
> +			err = 0;
> +			goto out_unlock;
> +		}
> +
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			intel_wakeref_t wakeref;
> +
> +			if (ctx)
> +				__destroy_hw_context(ctx, file->driver_priv);
> +
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
> +
> +			__set_ppgtt(ctx, parent->ppgtt);
> +
> +			if (!obj) {
> +				obj = create_test_object(parent, file, &objects);
> +				if (IS_ERR(obj)) {
> +					err = PTR_ERR(obj);
> +					goto out_unlock;
> +				}
> +			}
> +
> +			err = 0;
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
> +			if (err) {
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				       ndwords, dw, max_dwords(obj),
> +				       engine->name, ctx->hw_id,
> +				       yesno(!!ctx->ppgtt), err);
> +				goto out_unlock;
> +			}
> +
> +			if (++dw == max_dwords(obj)) {
> +				obj = NULL;
> +				dw = 0;
> +			}
> +
> +			ndwords++;
> +			ncontexts++;
> +		}
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
> +
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
> +
> +			dw += rem;
> +		}
>   
>   out_unlock:
> -	if (end_live_test(&t))
> -		err = -EIO;
> -	mutex_unlock(&i915->drm.struct_mutex);
> +		if (end_live_test(&t))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
>   
> -	mock_file_free(i915, file);
> -	return err;
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
>   }
>   
>   static int igt_ctx_readonly(void *arg)
> @@ -652,7 +789,7 @@ static int igt_ctx_readonly(void *arg)
>   	struct drm_i915_gem_object *obj = NULL;
>   	struct i915_gem_context *ctx;
>   	struct i915_hw_ppgtt *ppgtt;
> -	unsigned long ndwords, dw;
> +	unsigned long idx, ndwords, dw;
>   	struct drm_file *file;
>   	I915_RND_STATE(prng);
>   	IGT_TIMEOUT(end_time);
> @@ -733,6 +870,7 @@ static int igt_ctx_readonly(void *arg)
>   		ndwords, RUNTIME_INFO(i915)->num_rings);
>   
>   	dw = 0;
> +	idx = 0;
>   	list_for_each_entry(obj, &objects, st_link) {
>   		unsigned int rem =
>   			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> @@ -742,7 +880,7 @@ static int igt_ctx_readonly(void *arg)
>   		if (i915_gem_object_is_readonly(obj))
>   			num_writes = 0;
>   
> -		err = cpu_check(obj, num_writes);
> +		err = cpu_check(obj, idx++, num_writes);
>   		if (err)
>   			break;
>   
> @@ -1214,6 +1352,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>   		SUBTEST(live_nop_switch),
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
> +		SUBTEST(igt_shared_ctx_exec),
>   		SUBTEST(igt_vm_isolation),
>   	};
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 35eb40e5de91..298c4e11deda 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>   
>   	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>   
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   out_unlock:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 2009e776b5d8..b9fd2a4b95e9 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -59,13 +59,17 @@ mock_context(struct drm_i915_private *i915,
>   		goto err_handles;
>   
>   	if (name) {
> +		struct i915_hw_ppgtt *ppgtt;
> +
>   		ctx->name = kstrdup(name, GFP_KERNEL);
>   		if (!ctx->name)
>   			goto err_put;
>   
> -		ctx->ppgtt = mock_ppgtt(i915, name);
> -		if (!ctx->ppgtt)
> +		ppgtt = mock_ppgtt(i915, name);
> +		if (!ppgtt)
>   			goto err_put;
> +
> +		__set_ppgtt(ctx, ppgtt);
>   	}
>   
>   	return ctx;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c3336c931a95..eb19cf8a77ef 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1516,6 +1516,7 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +#define I915_CONTEXT_PARAM_VM		0x7

And some kernel doc for the param of course. :)

>   	__u64 value;
>   };
>   
> 

Okay I went a bit back and forth with this patch since I didn't really 
understand the ctx and ppggt lifetime/open-close rules.

Regards,

Tvrtko
Chris Wilson Jan. 23, 2019, 12:15 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-01-23 12:00:49)
> 
> On 18/01/2019 14:01, Chris Wilson wrote:
> > Allow the user to share ppGTT between contexts on the same fd. This
> > gives the user the ability to relax their context isolation to share vm
> > between their own contexts, but does not allow them to import a vm from
> > another fd. The use case for sharing a vm is for the proposed virtual
> > engine work where a context may be created explicitly to setup a load
> > balancing engine, but always be run in conjunction with a second context
> > for rcs/compute etc. By giving control to the user on how they setup the
> > vm allows for them to have a single vm between all kernel contexts being
> > used to emulate a single client context, similarly to how we use a
> > single vm across all engines within a single kernel context today. It
> > also allows for future specification a separate vm between engines
> > inside a single kernel context should that be desired.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c       | 118 ++++++++-
> >   drivers/gpu/drm/i915/i915_gem_gtt.c           |  17 +-
> >   drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
> >   drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
> >   .../gpu/drm/i915/selftests/i915_gem_context.c | 247 ++++++++++++++----
> >   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
> >   drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
> >   include/uapi/drm/i915_drm.h                   |   1 +
> >   8 files changed, 339 insertions(+), 68 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 7c90981704bf..f707241dbc78 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -109,6 +109,8 @@ static void lut_close(struct i915_gem_context *ctx)
> >               struct i915_vma *vma = rcu_dereference_raw(*slot);
> >   
> >               radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> > +
> > +             vma->open_count--;
> 
> I did not figure out what is this. A) why open coded vma management 
> without any comments, and b) rest of the patch doesn't seem to touch 
> this tree.

As the vm may be shared between multiple contexts, we may then open the
same vma and record it in the different context lut. Each instance needs
to be accounted for.

> >               __i915_gem_object_release_unless_active(vma->obj);
> >       }
> >       rcu_read_unlock();
> > @@ -291,7 +293,7 @@ static void context_close(struct i915_gem_context *ctx)
> >        */
> >       lut_close(ctx);
> >       if (ctx->ppgtt)
> > -             i915_ppgtt_close(&ctx->ppgtt->vm);
> > +             i915_ppgtt_close(ctx->ppgtt);
> 
> I'll need to figure out if it is okay for context to close the ppgtt 
> instead of just dropping references to it. Like two contexts sharing 
> ppgtt and one closes it, the other one should continue to work fine, no? 
> Or even a third context is created sharing the same ppgtt.

ppgtt->open_count? We don't close until everyone agrees.

> >       ctx->file_priv = ERR_PTR(-EBADF);
> >       i915_gem_context_put(ctx);
> > @@ -401,6 +403,23 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
> >       context_close(ctx);
> >   }
> >   
> > +static void __set_ppgtt(struct i915_gem_context *ctx,
> > +                     struct i915_hw_ppgtt *ppgtt)
> > +{
> > +     if (ppgtt == ctx->ppgtt)
> > +             return;
> > +
> > +     if (ctx->ppgtt) {
> > +             i915_ppgtt_close(ctx->ppgtt);
> 
> Feels incorrect to close it if it could be shared and in use elsewhere.
> 
> > +             i915_ppgtt_put(ctx->ppgtt);
> > +     }
> > +
> > +     i915_ppgtt_open(ppgtt);
> 
> Do we need some protection against trying to re-open a closed ppgtt here?

We BUG_ON as a closed ppgtt shouldn't have been accessible via the file_priv.

> > +     ctx->ppgtt = i915_ppgtt_get(ppgtt);
> > +
> > +     ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
> > +}
> > +
> >   static struct i915_gem_context *
> >   i915_gem_create_context(struct drm_i915_private *dev_priv,
> >                       struct drm_i915_file_private *file_priv)
> > @@ -427,8 +446,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> >                       return ERR_CAST(ppgtt);
> >               }
> >   
> > -             ctx->ppgtt = ppgtt;
> > -             ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> > +             __set_ppgtt(ctx, ppgtt);
> > +             i915_ppgtt_put(ppgtt);
> >       }
> >   
> >       trace_i915_context_create(ctx);
> > @@ -784,6 +803,87 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
> >       return 0;
> >   }
> >   
> > +static int get_ppgtt(struct i915_gem_context *ctx, u64 *out)
> 
> u32 *out ?

Oh, left over from planning for setting different vm on each engine. In
the end, I thought a separate setter/getter was sensible rather than
trying to overload a simple interface with a more complex one.

> > +{
> > +     struct drm_i915_file_private *file_priv = ctx->file_priv;
> > +     struct i915_hw_ppgtt *ppgtt;
> > +     int ret;
> > +
> > +     /* XXX rcu acquire? */
> > +     ppgtt = ctx->ppgtt;
> > +     if (!ppgtt)
> > +             return -ENODEV;
> > +
> > +     ret = mutex_lock_interruptible(&file_priv->vm_lock);
> > +     if (ret)
> > +             return ret;
> > +
> > +     ret = ppgtt->user_handle;
> > +     if (!ret) {
> > +             ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> 
> GEM_WARN_ON(ret == 0) just in case?
> 
> > +             if (ret > 0) {
> > +                     ppgtt->user_handle = ret;
> > +                     i915_ppgtt_get(ppgtt);
> > +             }
> > +     }
> > +
> > +     mutex_unlock(&file_priv->vm_lock);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     *out = ret;
> > +     return 0;
> > +}
> > +
> > +static int set_ppgtt(struct i915_gem_context *ctx, u32 id)
> > +{
> > +     struct drm_i915_file_private *file_priv = ctx->file_priv;
> > +     struct i915_hw_ppgtt *ppgtt;
> > +     int err;
> > +
> > +     err = mutex_lock_interruptible(&file_priv->vm_lock);
> > +     if (err)
> > +             return err;
> > +
> > +     ppgtt = idr_find(&file_priv->vm_idr, id);
> > +     if (ppgtt)
> > +             i915_ppgtt_get(ppgtt);
> > +     mutex_unlock(&file_priv->vm_lock);
> > +     if (!ppgtt)
> > +             return -ENOENT;
> > +
> > +     err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> > +     if (err)
> > +             goto out;
> > +
> > +     /*
> > +      * We need to flush any requests using the current ppgtt before
> > +      * we release it as the requests do not hold a reference themselves,
> > +      * only indirectly through the context. By switching to the kernel
> > +      * context, we ensure that the TLBs are reloaded before using the
> > +      * same context again -- an extra layer of paranoia over wait_for_idle.
> > +      */
> > +     err = i915_gem_switch_to_kernel_context(ctx->i915);
> > +     if (err)
> > +             goto out_unlock;
> > +
> > +     err = i915_gem_wait_for_idle(ctx->i915,
> > +                                  I915_WAIT_LOCKED |
> > +                                  I915_WAIT_INTERRUPTIBLE,
> > +                                  MAX_SCHEDULE_TIMEOUT);
> 
> This is a bit worrying. Every new client setting up their contexts 
> causes a global sync point. Sounds bad for scalability. It may be that 
> in practice the event might be happening only every few seconds, rather 
> than multiple times per second, but I am only guessing. How difficult to 
> make requests own ppgtt directly?

Or just add a retire callback from the kernel-context. The more we look,
the more use cases we find.

> > +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> >   {
> > -     if (ppgtt)
> > -             kref_get(&ppgtt->ref);
> > +     kref_get(&ppgtt->ref);
> > +     return ppgtt;
> 
> Unrelated hunk?

Am I not allowed to tidy up as I go along! I use it in this patch :)

> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > index 6a241745e78a..2864cfb82325 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > @@ -243,6 +243,12 @@ static int live_nop_switch(void *arg)
> >       return err;
> >   }
> >   
> > +#define GEN8_HIGH_ADDRESS_BIT 47
> > +static inline u64 gen8_canonical_addr(u64 address)
> > +{
> > +     return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
> > +}
> 
> We could move the copy from i915_gem_execbuffer.c to i915_gem_utils.h or 
> somewhere.

It's probably not even worth it, this is a debug hunk for another
problem, now it's own selftest to expose the issue.

> Okay I went a bit back and forth with this patch since I didn't really 
> understand the ctx and ppggt lifetime/open-close rules.

And I didn't even notice I was replying to a reply.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7c90981704bf..f707241dbc78 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -109,6 +109,8 @@  static void lut_close(struct i915_gem_context *ctx)
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -291,7 +293,7 @@  static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
+		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -401,6 +403,23 @@  static void __destroy_hw_context(struct i915_gem_context *ctx,
 	context_close(ctx);
 }
 
+static void __set_ppgtt(struct i915_gem_context *ctx,
+			struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt == ctx->ppgtt)
+		return;
+
+	if (ctx->ppgtt) {
+		i915_ppgtt_close(ctx->ppgtt);
+		i915_ppgtt_put(ctx->ppgtt);
+	}
+
+	i915_ppgtt_open(ppgtt);
+	ctx->ppgtt = i915_ppgtt_get(ppgtt);
+
+	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
 			struct drm_i915_file_private *file_priv)
@@ -427,8 +446,8 @@  i915_gem_create_context(struct drm_i915_private *dev_priv,
 			return ERR_CAST(ppgtt);
 		}
 
-		ctx->ppgtt = ppgtt;
-		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+		__set_ppgtt(ctx, ppgtt);
+		i915_ppgtt_put(ppgtt);
 	}
 
 	trace_i915_context_create(ctx);
@@ -784,6 +803,87 @@  int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	return 0;
 }
 
+static int get_ppgtt(struct i915_gem_context *ctx, u64 *out)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	/* XXX rcu acquire? */
+	ppgtt = ctx->ppgtt;
+	if (!ppgtt)
+		return -ENODEV;
+
+	ret = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (ret)
+		return ret;
+
+	ret = ppgtt->user_handle;
+	if (!ret) {
+		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+		if (ret > 0) {
+			ppgtt->user_handle = ret;
+			i915_ppgtt_get(ppgtt);
+		}
+	}
+
+	mutex_unlock(&file_priv->vm_lock);
+	if (ret < 0)
+		return ret;
+
+	*out = ret;
+	return 0;
+}
+
+static int set_ppgtt(struct i915_gem_context *ctx, u32 id)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_find(&file_priv->vm_idr, id);
+	if (ppgtt)
+		i915_ppgtt_get(ppgtt);
+	mutex_unlock(&file_priv->vm_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	/*
+	 * We need to flush any requests using the current ppgtt before
+	 * we release it as the requests do not hold a reference themselves,
+	 * only indirectly through the context. By switching to the kernel
+	 * context, we ensure that the TLBs are reloaded before using the
+	 * same context again -- an extra layer of paranoia over wait_for_idle.
+	 */
+	err = i915_gem_switch_to_kernel_context(ctx->i915);
+	if (err)
+		goto out_unlock;
+
+	err = i915_gem_wait_for_idle(ctx->i915,
+				     I915_WAIT_LOCKED |
+				     I915_WAIT_INTERRUPTIBLE,
+				     MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out_unlock;
+
+	__set_ppgtt(ctx, ppgtt);
+
+out_unlock:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+out:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
@@ -896,6 +996,9 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, &args->value);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -968,6 +1071,15 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_VM:
+		if (args->size)
+			ret = -EINVAL;
+		else if (upper_32_bits(args->value))
+			ret = -EINVAL;
+		else
+			ret = set_ppgtt(ctx, lower_32_bits(args->value));
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49b00996a15e..27b40c14e745 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2104,10 +2104,21 @@  i915_ppgtt_create(struct drm_i915_private *i915,
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
 {
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
+	GEM_BUG_ON(ppgtt->vm.closed);
+
+	ppgtt->open_count++;
+}
+
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
+{
+	GEM_BUG_ON(!ppgtt->open_count);
+	if (--ppgtt->open_count)
+		return;
+
+	GEM_BUG_ON(ppgtt->vm.closed);
+	ppgtt->vm.closed = true;
 }
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 61698609a62c..bb750318f52a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -391,6 +391,8 @@  struct i915_hw_ppgtt {
 	struct kref ref;
 
 	unsigned long pd_dirty_rings;
+	unsigned int open_count;
+
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
@@ -608,12 +610,16 @@  int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
 					struct drm_i915_file_private *fpriv);
-void i915_ppgtt_close(struct i915_address_space *vm);
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
+
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
+
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index a9a2fa35876f..a7ee8e97bcee 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1734,7 +1734,6 @@  int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 6a241745e78a..2864cfb82325 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -243,6 +243,12 @@  static int live_nop_switch(void *arg)
 	return err;
 }
 
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
 static struct i915_vma *
 gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 {
@@ -266,6 +272,7 @@  gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 
 	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
 	offset += vma->node.start;
+	offset = gen8_canonical_addr(offset);
 
 	for (n = 0; n < count; n++) {
 		if (gen >= 8) {
@@ -391,6 +398,7 @@  static int gpu_fill(struct drm_i915_gem_object *obj,
 		goto skip_request;
 
 	i915_gem_object_set_active_reference(batch->obj);
+
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
 
@@ -439,7 +447,8 @@  static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -457,8 +466,10 @@  static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -466,8 +477,9 @@  static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -545,12 +557,8 @@  static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	unsigned long ncontexts, ndwords, dw;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
-	struct live_test t;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -562,38 +570,42 @@  static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
-	file = mock_file(i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		struct live_test t;
 
-	mutex_lock(&i915->drm.struct_mutex);
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-	err = begin_live_test(&t, i915, __func__, "");
-	if (err)
-		goto out_unlock;
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
 
-		ctx = i915_gem_create_context(i915, file->driver_priv);
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = begin_live_test(&t, i915, __func__, engine->name);
+		if (err)
 			goto out_unlock;
-		}
 
-		for_each_engine(engine, i915, id) {
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
 			intel_wakeref_t wakeref;
 
-			if (!engine->context_size)
-				continue; /* No logical context support in HW */
-
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
 
 			if (!obj) {
 				obj = create_test_object(ctx, file, &objects);
@@ -603,7 +615,6 @@  static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			err = 0;
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
@@ -618,32 +629,158 @@  static int igt_ctx_exec(void *arg)
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
 		}
-		ncontexts++;
+
+out_unlock:
+		if (end_live_test(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
 	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, RUNTIME_INFO(i915)->num_rings, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+	return 0;
+}
 
-		err = cpu_check(obj, rem);
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+
+	for_each_engine(engine, i915, id) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		struct i915_gem_context *ctx = NULL;
+		struct i915_gem_context *parent;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		struct live_test t;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = begin_live_test(&t, i915, __func__, engine->name);
 		if (err)
-			break;
+			goto out_unlock;
 
-		dw += rem;
-	}
+		parent = i915_gem_create_context(i915, file->driver_priv);
+		if (IS_ERR(parent)) {
+			err = PTR_ERR(parent);
+			if (err == -ENODEV) /* no logical ctx support */
+				err = 0;
+			goto out_unlock;
+		}
+
+		if (!parent->ppgtt) {
+			err = 0;
+			goto out_unlock;
+		}
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			intel_wakeref_t wakeref;
+
+			if (ctx)
+				__destroy_hw_context(ctx, file->driver_priv);
+
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			__set_ppgtt(ctx, parent->ppgtt);
+
+			if (!obj) {
+				obj = create_test_object(parent, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			err = 0;
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
 
 out_unlock:
-	if (end_live_test(&t))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
+		if (end_live_test(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
 
-	mock_file_free(i915, file);
-	return err;
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static int igt_ctx_readonly(void *arg)
@@ -652,7 +789,7 @@  static int igt_ctx_readonly(void *arg)
 	struct drm_i915_gem_object *obj = NULL;
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
-	unsigned long ndwords, dw;
+	unsigned long idx, ndwords, dw;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
 	IGT_TIMEOUT(end_time);
@@ -733,6 +870,7 @@  static int igt_ctx_readonly(void *arg)
 		ndwords, RUNTIME_INFO(i915)->num_rings);
 
 	dw = 0;
+	idx = 0;
 	list_for_each_entry(obj, &objects, st_link) {
 		unsigned int rem =
 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
@@ -742,7 +880,7 @@  static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, idx++, num_writes);
 		if (err)
 			break;
 
@@ -1214,6 +1352,7 @@  int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_shared_ctx_exec),
 		SUBTEST(igt_vm_isolation),
 	};
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 35eb40e5de91..298c4e11deda 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1020,7 +1020,6 @@  static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 2009e776b5d8..b9fd2a4b95e9 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -59,13 +59,17 @@  mock_context(struct drm_i915_private *i915,
 		goto err_handles;
 
 	if (name) {
+		struct i915_hw_ppgtt *ppgtt;
+
 		ctx->name = kstrdup(name, GFP_KERNEL);
 		if (!ctx->name)
 			goto err_put;
 
-		ctx->ppgtt = mock_ppgtt(i915, name);
-		if (!ctx->ppgtt)
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
 			goto err_put;
+
+		__set_ppgtt(ctx, ppgtt);
 	}
 
 	return ctx;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c3336c931a95..eb19cf8a77ef 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1516,6 +1516,7 @@  struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+#define I915_CONTEXT_PARAM_VM		0x7
 	__u64 value;
 };