diff mbox series

[2/2] drm/i915: Unshare the idle-barrier from other kernel requests

Message ID 20190725131447.27515-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915/gt: Add to timeline requires the timeline mutex | expand

Commit Message

Chris Wilson July 25, 2019, 1:14 p.m. UTC
Under some circumstances (see intel_context_prepare_remote_request), we
may use a request along a kernel context to modify the logical state of
another. To keep the target context in place while the request executes,
we take an active reference on it using the kernel timeline. This is the
same timeline as we use for the idle-barrier, and so we end up reusing
the same active node. Except that the idle barrier is special and cannot
be reused in this manner! Give the idle-barrier a reserved timeline
index (0) so that it will always be unique (give or take we may issue
multiple idle barriers across multiple engines).

Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch")
Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       |  40 ++-
 drivers/gpu/drm/i915/gt/intel_context.h       |  13 +-
 drivers/gpu/drm/i915/gt/selftest_context.c    | 322 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_active.c            |  67 +++-
 .../drm/i915/selftests/i915_live_selftests.h  |   3 +-
 5 files changed, 408 insertions(+), 37 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/selftest_context.c

Comments

Tvrtko Ursulin July 29, 2019, 7:46 a.m. UTC | #1
On 25/07/2019 14:14, Chris Wilson wrote:
> Under some circumstances (see intel_context_prepare_remote_request), we
> may use a request along a kernel context to modify the logical state of
> another. To keep the target context in place while the request executes,
> we take an active reference on it using the kernel timeline. This is the
> same timeline as we use for the idle-barrier, and so we end up reusing
> the same active node. Except that the idle barrier is special and cannot
> be reused in this manner! Give the idle-barrier a reserved timeline
> index (0) so that it will always be unique (give or take we may issue
> multiple idle barriers across multiple engines).
> 
> Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch")
> Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  40 ++-
>   drivers/gpu/drm/i915/gt/intel_context.h       |  13 +-
>   drivers/gpu/drm/i915/gt/selftest_context.c    | 322 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_active.c            |  67 +++-
>   .../drm/i915/selftests/i915_live_selftests.h  |   3 +-
>   5 files changed, 408 insertions(+), 37 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/selftest_context.c
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index d64b45f7ec6d..211ac6568a5d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -162,23 +162,41 @@ static int __intel_context_active(struct i915_active *active)
>   	if (err)
>   		goto err_ring;
>   
> +	return 0;
> +
> +err_ring:
> +	intel_ring_unpin(ce->ring);
> +err_put:
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +int intel_context_active_acquire(struct intel_context *ce)
> +{
> +	int err;
> +
> +	err = i915_active_acquire(&ce->active);
> +	if (err)
> +		return err;
> +
>   	/* Preallocate tracking nodes */
>   	if (!i915_gem_context_is_kernel(ce->gem_context)) {
>   		err = i915_active_acquire_preallocate_barrier(&ce->active,
>   							      ce->engine);
> -		if (err)
> -			goto err_state;
> +		if (err) {
> +			i915_active_release(&ce->active);
> +			return err;
> +		}
>   	}
>   
>   	return 0;
> +}
>   
> -err_state:
> -	__context_unpin_state(ce->state);
> -err_ring:
> -	intel_ring_unpin(ce->ring);
> -err_put:
> -	intel_context_put(ce);
> -	return err;
> +void intel_context_active_release(struct intel_context *ce)
> +{
> +	/* Nodes preallocated in intel_context_active() */
> +	i915_active_acquire_barrier(&ce->active);
> +	i915_active_release(&ce->active);
>   }
>   
>   void
> @@ -297,3 +315,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
>   
>   	return rq;
>   }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftest_context.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 23c7e4c0ce7c..07f9924de48f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -104,17 +104,8 @@ static inline void intel_context_exit(struct intel_context *ce)
>   		ce->ops->exit(ce);
>   }
>   
> -static inline int intel_context_active_acquire(struct intel_context *ce)
> -{
> -	return i915_active_acquire(&ce->active);
> -}
> -
> -static inline void intel_context_active_release(struct intel_context *ce)
> -{
> -	/* Nodes preallocated in intel_context_active() */
> -	i915_active_acquire_barrier(&ce->active);
> -	i915_active_release(&ce->active);
> -}
> +int intel_context_active_acquire(struct intel_context *ce);
> +void intel_context_active_release(struct intel_context *ce);
>   
>   static inline struct intel_context *intel_context_get(struct intel_context *ce)
>   {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> new file mode 100644
> index 000000000000..b40efdaabdd5
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -0,0 +1,322 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_selftest.h"
> +#include "intel_gt.h"
> +
> +#include "gem/selftests/mock_context.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/mock_drm.h"
> +
> +static int request_sync(struct i915_request *rq)
> +{
> +	long timeout;
> +	int err = 0;
> +
> +	i915_request_get(rq);
> +
> +	i915_request_add(rq);
> +	timeout = i915_request_wait(rq, 0, HZ / 10);
> +	if (timeout < 0)
> +		err = timeout;
> +	else
> +		i915_request_retire_upto(rq);
> +
> +	i915_request_put(rq);
> +
> +	return err;
> +}
> +
> +static int context_sync(struct intel_context *ce)
> +{
> +	struct intel_timeline *tl = ce->ring->timeline;
> +	int err = 0;
> +
> +	do {
> +		struct i915_request *rq;
> +		long timeout;
> +
> +		rcu_read_lock();
> +		rq = rcu_dereference(tl->last_request.request);
> +		if (rq)
> +			rq = i915_request_get_rcu(rq);
> +		rcu_read_unlock();
> +		if (!rq)
> +			break;
> +
> +		timeout = i915_request_wait(rq, 0, HZ / 10);
> +		if (timeout < 0)
> +			err = timeout;
> +		else
> +			i915_request_retire_upto(rq);
> +
> +		i915_request_put(rq);
> +	} while (!err);
> +
> +	return err;
> +}
> +
> +static int __live_active_context(struct intel_engine_cs *engine,
> +				 struct i915_gem_context *fixme)
> +{
> +	struct intel_context *ce;
> +	int pass;
> +	int err;
> +
> +	/*
> +	 * We keep active contexts alive until after a subsequent context
> +	 * switch as the final write from the context-save will be after
> +	 * we retire the final request. We track when we unpin the context,
> +	 * under the presumption that the final pin is from the last request,
> +	 * and instead of immediately unpinning the context, we add a task
> +	 * to unpin the context from the next idle-barrier.
> +	 *
> +	 * This test makes sure that the context is kept alive until a
> +	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
> +	 * with no more outstanding requests).
> +	 */
> +
> +	if (intel_engine_pm_is_awake(engine)) {
> +		pr_err("%s is awake before starting %s!\n",
> +		       engine->name, __func__);
> +		return -EINVAL;
> +	}
> +
> +	ce = intel_context_create(fixme, engine);
> +	if (!ce)
> +		return -ENOMEM;
> +
> +	for (pass = 0; pass <= 2; pass++) {
> +		struct i915_request *rq;
> +
> +		rq = intel_context_create_request(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err;
> +		}
> +
> +		err = request_sync(rq);
> +		if (err)
> +			goto err;
> +
> +		/* Context will be kept active until after an idle-barrier. */
> +		if (i915_active_is_idle(&ce->active)) {
> +			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
> +			       engine->name, pass);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (!intel_engine_pm_is_awake(engine)) {
> +			pr_err("%s is asleep before idle-barrier\n",
> +			       engine->name);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +	/* Now make sure our idle-barriers are flushed */
> +	err = context_sync(engine->kernel_context);
> +	if (err)
> +		goto err;
> +
> +	if (!i915_active_is_idle(&ce->active)) {
> +		pr_err("context is still active!");
> +		err = -EINVAL;
> +	}
> +
> +	if (intel_engine_pm_is_awake(engine)) {
> +		struct drm_printer p = drm_debug_printer(__func__);
> +
> +		intel_engine_dump(engine, &p,
> +				  "%s is still awake after idle-barriers\n",
> +				  engine->name);
> +		GEM_TRACE_DUMP();
> +
> +		err = -EINVAL;
> +		goto err;
> +	}
> +
> +err:
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static int live_active_context(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *engine;
> +	struct i915_gem_context *fixme;
> +	enum intel_engine_id id;
> +	struct drm_file *file;
> +	int err = 0;
> +
> +	file = mock_file(gt->i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	mutex_lock(&gt->i915->drm.struct_mutex);
> +
> +	fixme = live_context(gt->i915, file);
> +	if (!fixme) {
> +		err = -ENOMEM;
> +		goto unlock;
> +	}
> +
> +	for_each_engine(engine, gt->i915, id) {
> +		err = __live_active_context(engine, fixme);
> +		if (err)
> +			break;
> +
> +		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
> +		if (err)
> +			break;
> +	}
> +
> +unlock:
> +	mutex_unlock(&gt->i915->drm.struct_mutex);
> +	mock_file_free(gt->i915, file);
> +	return err;
> +}
> +
> +static int __live_remote_context(struct intel_engine_cs *engine,
> +				 struct i915_gem_context *fixme)
> +{
> +	struct intel_context *local, *remote;
> +	struct i915_request *rq;
> +	int pass;
> +	int err;
> +
> +	/*
> +	 * Check that our idle barriers do not interfere with normal
> +	 * activity tracking. In particular, check that operating
> +	 * on the context image remotely (intel_context_prepare_remote_request)
> +	 * which inserts foriegn fences into intel_context.active are not

typo in foreign

"operating ... are not .." ? Foreign fences are not clobbered?

> +	 * clobbered.
> +	 */
> +
> +	remote = intel_context_create(fixme, engine);
> +	if (!remote)
> +		return -ENOMEM;
> +
> +	local = intel_context_create(fixme, engine);
> +	if (!local) {
> +		err = -ENOMEM;
> +		goto err_remote;
> +	}
> +
> +	for (pass = 0; pass <= 2; pass++) {
> +		err = intel_context_pin(remote);
> +		if (err)
> +			goto err_local;
> +
> +		rq = intel_context_create_request(local);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err_unpin;
> +		}
> +
> +		err = intel_context_prepare_remote_request(remote, rq);
> +		if (err) {
> +			i915_request_add(rq);
> +			goto err_unpin;
> +		}
> +
> +		err = request_sync(rq);
> +		if (err)
> +			goto err_unpin;
> +
> +		intel_context_unpin(remote);
> +		err = intel_context_pin(remote);

unpin-pin is to trigger transfer of idle barriers and maybe trigger some 
asserts?

> +		if (err)
> +			goto err_local;
> +
> +		rq = i915_request_create(engine->kernel_context);

Why a request on kernel context here, a third context?

> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto err_unpin;
> +		}
> +
> +		err = intel_context_prepare_remote_request(remote, rq);
> +		if (err) {
> +			i915_request_add(rq);
> +			goto err_unpin;
> +		}
> +
> +		err = request_sync(rq);
> +		if (err)
> +			goto err_unpin;
> +
> +		intel_context_unpin(remote);
> +
> +		if (i915_active_is_idle(&remote->active)) {
> +			pr_err("remote context is not active; expected idle-barrier (pass %d)\n", pass);
> +			err = -EINVAL;
> +			goto err_local;
> +		}
> +	}
> +
> +	goto err_local;
> +
> +err_unpin:
> +	intel_context_unpin(remote);
> +err_local:
> +	intel_context_put(local);
> +err_remote:
> +	intel_context_put(remote);
> +	return err;
> +}
> +
> +static int live_remote_context(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *engine;
> +	struct i915_gem_context *fixme;
> +	enum intel_engine_id id;
> +	struct drm_file *file;
> +	int err = 0;
> +
> +	file = mock_file(gt->i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	mutex_lock(&gt->i915->drm.struct_mutex);
> +
> +	fixme = live_context(gt->i915, file);
> +	if (!fixme) {
> +		err = -ENOMEM;
> +		goto unlock;
> +	}
> +
> +	for_each_engine(engine, gt->i915, id) {
> +		err = __live_remote_context(engine, fixme);
> +		if (err)
> +			break;
> +
> +		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
> +		if (err)
> +			break;
> +	}
> +
> +unlock:
> +	mutex_unlock(&gt->i915->drm.struct_mutex);
> +	mock_file_free(gt->i915, file);
> +	return err;
> +}
> +
> +int intel_context_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(live_active_context),
> +		SUBTEST(live_remote_context),
> +	};
> +	struct intel_gt *gt = &i915->gt;
> +
> +	if (intel_gt_is_wedged(gt))
> +		return 0;
> +
> +	return intel_gt_live_subtests(tests, gt);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 13f304a29fc8..e04afb519264 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -184,6 +184,7 @@ active_instance(struct i915_active *ref, u64 idx)
>   	ref->cache = node;
>   	mutex_unlock(&ref->mutex);
>   
> +	BUILD_BUG_ON(offsetof(typeof(*node), base));
>   	return &node->base;
>   }
>   
> @@ -212,6 +213,8 @@ int i915_active_ref(struct i915_active *ref,
>   	struct i915_active_request *active;
>   	int err;
>   
> +	GEM_BUG_ON(!timeline); /* reserved for idle-barrier */
> +
>   	/* Prevent reaping in case we malloc/wait while building the tree */
>   	err = i915_active_acquire(ref);
>   	if (err)
> @@ -222,6 +225,7 @@ int i915_active_ref(struct i915_active *ref,
>   		err = -ENOMEM;
>   		goto out;
>   	}
> +	GEM_BUG_ON(IS_ERR(active->request));
>   
>   	if (!i915_active_request_isset(active))
>   		atomic_inc(&ref->count);
> @@ -342,6 +346,34 @@ void i915_active_fini(struct i915_active *ref)
>   }
>   #endif
>   
> +static struct active_node *idle_barrier(struct i915_active *ref)
> +{
> +	struct active_node *idle = NULL;
> +	struct rb_node *rb;
> +
> +	if (RB_EMPTY_ROOT(&ref->tree))
> +		return NULL;
> +
> +	mutex_lock(&ref->mutex);
> +	for (rb = rb_first(&ref->tree); rb; rb = rb_next(rb)) {
> +		struct active_node *node;
> +
> +		node = rb_entry(rb, typeof(*node), node);
> +		if (node->timeline)
> +			break;
> +
> +		if (!i915_active_request_isset(&node->base)) {
> +			GEM_BUG_ON(!list_empty(&node->base.link));
> +			rb_erase(rb, &ref->tree);
> +			idle = node;
> +			break;
> +		}

Under what circumstances does the walk continue? There can be two idle 
barriers (timeline == 0) in the tree?

> +	}
> +	mutex_unlock(&ref->mutex);
> +
> +	return idle;
> +}
> +
>   int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   					    struct intel_engine_cs *engine)
>   {
> @@ -352,22 +384,29 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   
>   	GEM_BUG_ON(!engine->mask);
>   	for_each_engine_masked(engine, i915, engine->mask, tmp) {
> -		struct intel_context *kctx = engine->kernel_context;
>   		struct active_node *node;
>   
> -		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
> -		if (unlikely(!node)) {
> -			err = -ENOMEM;
> -			goto unwind;
> +		node = idle_barrier(ref);
> +		if (!node) {
> +			node = kmem_cache_alloc(global.slab_cache,
> +						GFP_KERNEL |
> +						__GFP_RETRY_MAYFAIL |
> +						__GFP_NOWARN);
> +			if (unlikely(!node)) {
> +				err = -ENOMEM;
> +				goto unwind;
> +			}
> +
> +			node->ref = ref;
> +			node->timeline = 0;
> +			node->base.retire = node_retire;
>   		}
>   
> -		i915_active_request_init(&node->base,
> -					 (void *)engine, node_retire);
> -		node->timeline = kctx->ring->timeline->fence_context;
> -		node->ref = ref;
> +		intel_engine_pm_get(engine);
> +
> +		RCU_INIT_POINTER(node->base.request, (void *)engine);
>   		atomic_inc(&ref->count);
>   
> -		intel_engine_pm_get(engine);
>   		llist_add((struct llist_node *)&node->base.link,
>   			  &ref->barriers);
>   	}
> @@ -402,6 +441,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   
>   		node = container_of((struct list_head *)pos,
>   				    typeof(*node), base.link);
> +		GEM_BUG_ON(node->timeline);
>   
>   		engine = (void *)rcu_access_pointer(node->base.request);
>   		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
> @@ -410,12 +450,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   		p = &ref->tree.rb_node;
>   		while (*p) {
>   			parent = *p;
> -			if (rb_entry(parent,
> -				     struct active_node,
> -				     node)->timeline < node->timeline)
> -				p = &parent->rb_right;
> -			else
> -				p = &parent->rb_left;
> +			p = &parent->rb_left;
>   		}
>   		rb_link_node(&node->node, parent, p);
>   		rb_insert_color(&node->node, &ref->tree);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index 2b31a4ee0b4c..a841d3f9bedc 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests)
>   selftest(timelines, intel_timeline_live_selftests)
>   selftest(requests, i915_request_live_selftests)
>   selftest(active, i915_active_live_selftests)
> +selftest(gt_contexts, intel_context_live_selftests)
>   selftest(objects, i915_gem_object_live_selftests)
>   selftest(mman, i915_gem_mman_live_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_live_selftests)
> @@ -24,7 +25,7 @@ selftest(gtt, i915_gem_gtt_live_selftests)
>   selftest(gem, i915_gem_live_selftests)
>   selftest(evict, i915_gem_evict_live_selftests)
>   selftest(hugepages, i915_gem_huge_page_live_selftests)
> -selftest(contexts, i915_gem_context_live_selftests)
> +selftest(gem_contexts, i915_gem_context_live_selftests)
>   selftest(blt, i915_gem_object_blt_live_selftests)
>   selftest(client, i915_gem_client_blt_live_selftests)
>   selftest(reset, intel_reset_live_selftests)
> 

Regards,

Tvrtko
Chris Wilson July 29, 2019, 8:54 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-07-29 08:46:59)
> 
> On 25/07/2019 14:14, Chris Wilson wrote:
> > +static int __live_remote_context(struct intel_engine_cs *engine,
> > +                              struct i915_gem_context *fixme)
> > +{
> > +     struct intel_context *local, *remote;
> > +     struct i915_request *rq;
> > +     int pass;
> > +     int err;
> > +
> > +     /*
> > +      * Check that our idle barriers do not interfere with normal
> > +      * activity tracking. In particular, check that operating
> > +      * on the context image remotely (intel_context_prepare_remote_request)
> > +      * which inserts foriegn fences into intel_context.active are not
> 
> typo in foreign
> 
> "operating ... are not .." ? Foreign fences are not clobbered?

, does not clobber the active tracking.

> 
> > +      * clobbered.
> > +      */
> > +
> > +     remote = intel_context_create(fixme, engine);
> > +     if (!remote)
> > +             return -ENOMEM;
> > +
> > +     local = intel_context_create(fixme, engine);
> > +     if (!local) {
> > +             err = -ENOMEM;
> > +             goto err_remote;
> > +     }
> > +
> > +     for (pass = 0; pass <= 2; pass++) {
> > +             err = intel_context_pin(remote);
> > +             if (err)
> > +                     goto err_local;
> > +
> > +             rq = intel_context_create_request(local);
> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     goto err_unpin;
> > +             }
> > +
> > +             err = intel_context_prepare_remote_request(remote, rq);
> > +             if (err) {
> > +                     i915_request_add(rq);
> > +                     goto err_unpin;
> > +             }
> > +
> > +             err = request_sync(rq);
> > +             if (err)
> > +                     goto err_unpin;
> > +
> > +             intel_context_unpin(remote);
> > +             err = intel_context_pin(remote);
> 
> unpin-pin is to trigger transfer of idle barriers and maybe trigger some 
> asserts?

unpin is to trigger the idle-barrier. The pin is just the start of the
next phase with another context. At first I tried doing two remote
requests within on pin-phase, but that doesn't hit the bug. It needed
the idle barrier in the middle of the test, not between passes.

v2 wrapped it with another subroutine so the unpin-pin is not so
glaringly obvious.

> > +             if (err)
> > +                     goto err_local;
> > +
> > +             rq = i915_request_create(engine->kernel_context);
> 
> Why a request on kernel context here, a third context?

The kernel_context is most important since that's the one used by the
idle barrier. I included the normal context as well for completeness as
the intel_context_prepare_remote_request() interface should not assume
it is working from the kernel context.

> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     goto err_unpin;
> > +             }
> > +
> > +             err = intel_context_prepare_remote_request(remote, rq);
> > +             if (err) {
> > +                     i915_request_add(rq);
> > +                     goto err_unpin;
> > +             }
> > +
> > +             err = request_sync(rq);
> > +             if (err)
> > +                     goto err_unpin;
> > +
> > +             intel_context_unpin(remote);
> > +
> > +             if (i915_active_is_idle(&remote->active)) {
> > +                     pr_err("remote context is not active; expected idle-barrier (pass %d)\n", pass);
> > +                     err = -EINVAL;
> > +                     goto err_local;
> > +             }
> > +     }
> > +
> > +     goto err_local;
> > +
> > +err_unpin:
> > +     intel_context_unpin(remote);
> > +err_local:
> > +     intel_context_put(local);
> > +err_remote:
> > +     intel_context_put(remote);
> > +     return err;
> > +}
> > +
> > +static int live_remote_context(void *arg)
> > +{
> > +     struct intel_gt *gt = arg;
> > +     struct intel_engine_cs *engine;
> > +     struct i915_gem_context *fixme;
> > +     enum intel_engine_id id;
> > +     struct drm_file *file;
> > +     int err = 0;
> > +
> > +     file = mock_file(gt->i915);
> > +     if (IS_ERR(file))
> > +             return PTR_ERR(file);
> > +
> > +     mutex_lock(&gt->i915->drm.struct_mutex);
> > +
> > +     fixme = live_context(gt->i915, file);
> > +     if (!fixme) {
> > +             err = -ENOMEM;
> > +             goto unlock;
> > +     }
> > +
> > +     for_each_engine(engine, gt->i915, id) {
> > +             err = __live_remote_context(engine, fixme);
> > +             if (err)
> > +                     break;
> > +
> > +             err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
> > +             if (err)
> > +                     break;
> > +     }
> > +
> > +unlock:
> > +     mutex_unlock(&gt->i915->drm.struct_mutex);
> > +     mock_file_free(gt->i915, file);
> > +     return err;
> > +}
> > +
> > +int intel_context_live_selftests(struct drm_i915_private *i915)
> > +{
> > +     static const struct i915_subtest tests[] = {
> > +             SUBTEST(live_active_context),
> > +             SUBTEST(live_remote_context),
> > +     };
> > +     struct intel_gt *gt = &i915->gt;
> > +
> > +     if (intel_gt_is_wedged(gt))
> > +             return 0;
> > +
> > +     return intel_gt_live_subtests(tests, gt);
> > +}
> > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> > index 13f304a29fc8..e04afb519264 100644
> > --- a/drivers/gpu/drm/i915/i915_active.c
> > +++ b/drivers/gpu/drm/i915/i915_active.c
> > @@ -184,6 +184,7 @@ active_instance(struct i915_active *ref, u64 idx)
> >       ref->cache = node;
> >       mutex_unlock(&ref->mutex);
> >   
> > +     BUILD_BUG_ON(offsetof(typeof(*node), base));
> >       return &node->base;
> >   }
> >   
> > @@ -212,6 +213,8 @@ int i915_active_ref(struct i915_active *ref,
> >       struct i915_active_request *active;
> >       int err;
> >   
> > +     GEM_BUG_ON(!timeline); /* reserved for idle-barrier */
> > +
> >       /* Prevent reaping in case we malloc/wait while building the tree */
> >       err = i915_active_acquire(ref);
> >       if (err)
> > @@ -222,6 +225,7 @@ int i915_active_ref(struct i915_active *ref,
> >               err = -ENOMEM;
> >               goto out;
> >       }
> > +     GEM_BUG_ON(IS_ERR(active->request));
> >   
> >       if (!i915_active_request_isset(active))
> >               atomic_inc(&ref->count);
> > @@ -342,6 +346,34 @@ void i915_active_fini(struct i915_active *ref)
> >   }
> >   #endif
> >   
> > +static struct active_node *idle_barrier(struct i915_active *ref)
> > +{
> > +     struct active_node *idle = NULL;
> > +     struct rb_node *rb;
> > +
> > +     if (RB_EMPTY_ROOT(&ref->tree))
> > +             return NULL;
> > +
> > +     mutex_lock(&ref->mutex);
> > +     for (rb = rb_first(&ref->tree); rb; rb = rb_next(rb)) {
> > +             struct active_node *node;
> > +
> > +             node = rb_entry(rb, typeof(*node), node);
> > +             if (node->timeline)
> > +                     break;
> > +
> > +             if (!i915_active_request_isset(&node->base)) {
> > +                     GEM_BUG_ON(!list_empty(&node->base.link));
> > +                     rb_erase(rb, &ref->tree);
> > +                     idle = node;
> > +                     break;
> > +             }
> 
> Under what circumstances does the walk continue? There can be two idle 
> barriers (timeline == 0) in the tree?

Yes, there can be more than one (virtual engines). It should be the case
that when i915_active becomes idle (all idle barriers are idle) that the
tree is reaped. But... if we overlap active phases, we will get multiple
idle barriers, some idle, some active, which we want to reuse to avoid
having a potentially unbounded allocation.
-Chris
Tvrtko Ursulin July 29, 2019, 9:43 a.m. UTC | #3
On 29/07/2019 09:54, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-07-29 08:46:59)
>>
>> On 25/07/2019 14:14, Chris Wilson wrote:
>>> +static int __live_remote_context(struct intel_engine_cs *engine,
>>> +                              struct i915_gem_context *fixme)
>>> +{
>>> +     struct intel_context *local, *remote;
>>> +     struct i915_request *rq;
>>> +     int pass;
>>> +     int err;
>>> +
>>> +     /*
>>> +      * Check that our idle barriers do not interfere with normal
>>> +      * activity tracking. In particular, check that operating
>>> +      * on the context image remotely (intel_context_prepare_remote_request)
>>> +      * which inserts foriegn fences into intel_context.active are not
>>
>> typo in foreign
>>
>> "operating ... are not .." ? Foreign fences are not clobbered?
> 
> , does not clobber the active tracking.
> 
>>
>>> +      * clobbered.
>>> +      */
>>> +
>>> +     remote = intel_context_create(fixme, engine);
>>> +     if (!remote)
>>> +             return -ENOMEM;
>>> +
>>> +     local = intel_context_create(fixme, engine);
>>> +     if (!local) {
>>> +             err = -ENOMEM;
>>> +             goto err_remote;
>>> +     }
>>> +
>>> +     for (pass = 0; pass <= 2; pass++) {
>>> +             err = intel_context_pin(remote);
>>> +             if (err)
>>> +                     goto err_local;
>>> +
>>> +             rq = intel_context_create_request(local);
>>> +             if (IS_ERR(rq)) {
>>> +                     err = PTR_ERR(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = intel_context_prepare_remote_request(remote, rq);
>>> +             if (err) {
>>> +                     i915_request_add(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = request_sync(rq);
>>> +             if (err)
>>> +                     goto err_unpin;
>>> +
>>> +             intel_context_unpin(remote);
>>> +             err = intel_context_pin(remote);
>>
>> unpin-pin is to trigger transfer of idle barriers and maybe trigger some
>> asserts?
> 
> unpin is to trigger the idle-barrier. The pin is just the start of the
> next phase with another context. At first I tried doing two remote
> requests within on pin-phase, but that doesn't hit the bug. It needed
> the idle barrier in the middle of the test, not between passes.
> 
> v2 wrapped it with another subroutine so the unpin-pin is not so
> glaringly obvious.

v*2*? :D Hard to know without revisioning and multiple sends... I've 
found it now, I've been looking at a very different version here.

> 
>>> +             if (err)
>>> +                     goto err_local;
>>> +
>>> +             rq = i915_request_create(engine->kernel_context);
>>
>> Why a request on kernel context here, a third context?
> 
> The kernel_context is most important since that's the one used by the
> idle barrier. I included the normal context as well for completeness as
> the intel_context_prepare_remote_request() interface should not assume
> it is working from the kernel context.

Most important = worthy of a comment? ;)

> 
>>> +             if (IS_ERR(rq)) {
>>> +                     err = PTR_ERR(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = intel_context_prepare_remote_request(remote, rq);
>>> +             if (err) {
>>> +                     i915_request_add(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = request_sync(rq);
>>> +             if (err)
>>> +                     goto err_unpin;
>>> +
>>> +             intel_context_unpin(remote);
>>> +
>>> +             if (i915_active_is_idle(&remote->active)) {
>>> +                     pr_err("remote context is not active; expected idle-barrier (pass %d)\n", pass);
>>> +                     err = -EINVAL;
>>> +                     goto err_local;
>>> +             }
>>> +     }
>>> +
>>> +     goto err_local;
>>> +
>>> +err_unpin:
>>> +     intel_context_unpin(remote);
>>> +err_local:
>>> +     intel_context_put(local);
>>> +err_remote:
>>> +     intel_context_put(remote);
>>> +     return err;
>>> +}
>>> +
>>> +static int live_remote_context(void *arg)
>>> +{
>>> +     struct intel_gt *gt = arg;
>>> +     struct intel_engine_cs *engine;
>>> +     struct i915_gem_context *fixme;
>>> +     enum intel_engine_id id;
>>> +     struct drm_file *file;
>>> +     int err = 0;
>>> +
>>> +     file = mock_file(gt->i915);
>>> +     if (IS_ERR(file))
>>> +             return PTR_ERR(file);
>>> +
>>> +     mutex_lock(&gt->i915->drm.struct_mutex);
>>> +
>>> +     fixme = live_context(gt->i915, file);
>>> +     if (!fixme) {
>>> +             err = -ENOMEM;
>>> +             goto unlock;
>>> +     }
>>> +
>>> +     for_each_engine(engine, gt->i915, id) {
>>> +             err = __live_remote_context(engine, fixme);
>>> +             if (err)
>>> +                     break;
>>> +
>>> +             err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
>>> +             if (err)
>>> +                     break;
>>> +     }
>>> +
>>> +unlock:
>>> +     mutex_unlock(&gt->i915->drm.struct_mutex);
>>> +     mock_file_free(gt->i915, file);
>>> +     return err;
>>> +}
>>> +
>>> +int intel_context_live_selftests(struct drm_i915_private *i915)
>>> +{
>>> +     static const struct i915_subtest tests[] = {
>>> +             SUBTEST(live_active_context),
>>> +             SUBTEST(live_remote_context),
>>> +     };
>>> +     struct intel_gt *gt = &i915->gt;
>>> +
>>> +     if (intel_gt_is_wedged(gt))
>>> +             return 0;
>>> +
>>> +     return intel_gt_live_subtests(tests, gt);
>>> +}
>>> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
>>> index 13f304a29fc8..e04afb519264 100644
>>> --- a/drivers/gpu/drm/i915/i915_active.c
>>> +++ b/drivers/gpu/drm/i915/i915_active.c
>>> @@ -184,6 +184,7 @@ active_instance(struct i915_active *ref, u64 idx)
>>>        ref->cache = node;
>>>        mutex_unlock(&ref->mutex);
>>>    
>>> +     BUILD_BUG_ON(offsetof(typeof(*node), base));
>>>        return &node->base;
>>>    }
>>>    
>>> @@ -212,6 +213,8 @@ int i915_active_ref(struct i915_active *ref,
>>>        struct i915_active_request *active;
>>>        int err;
>>>    
>>> +     GEM_BUG_ON(!timeline); /* reserved for idle-barrier */
>>> +
>>>        /* Prevent reaping in case we malloc/wait while building the tree */
>>>        err = i915_active_acquire(ref);
>>>        if (err)
>>> @@ -222,6 +225,7 @@ int i915_active_ref(struct i915_active *ref,
>>>                err = -ENOMEM;
>>>                goto out;
>>>        }
>>> +     GEM_BUG_ON(IS_ERR(active->request));
>>>    
>>>        if (!i915_active_request_isset(active))
>>>                atomic_inc(&ref->count);
>>> @@ -342,6 +346,34 @@ void i915_active_fini(struct i915_active *ref)
>>>    }
>>>    #endif
>>>    
>>> +static struct active_node *idle_barrier(struct i915_active *ref)
>>> +{
>>> +     struct active_node *idle = NULL;
>>> +     struct rb_node *rb;
>>> +
>>> +     if (RB_EMPTY_ROOT(&ref->tree))
>>> +             return NULL;
>>> +
>>> +     mutex_lock(&ref->mutex);
>>> +     for (rb = rb_first(&ref->tree); rb; rb = rb_next(rb)) {
>>> +             struct active_node *node;
>>> +
>>> +             node = rb_entry(rb, typeof(*node), node);
>>> +             if (node->timeline)
>>> +                     break;
>>> +
>>> +             if (!i915_active_request_isset(&node->base)) {
>>> +                     GEM_BUG_ON(!list_empty(&node->base.link));
>>> +                     rb_erase(rb, &ref->tree);
>>> +                     idle = node;
>>> +                     break;
>>> +             }
>>
>> Under what circumstances does the walk continue? There can be two idle
>> barriers (timeline == 0) in the tree?
> 
> Yes, there can be more than one (virtual engines). It should be the case
> that when i915_active becomes idle (all idle barriers are idle) that the
> tree is reaped. But... if we overlap active phases, we will get multiple
> idle barriers, some idle, some active, which we want to reuse to avoid
> having a potentially unbounded allocation.

This feels is comment worthy as well.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index d64b45f7ec6d..211ac6568a5d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -162,23 +162,41 @@  static int __intel_context_active(struct i915_active *active)
 	if (err)
 		goto err_ring;
 
+	return 0;
+
+err_ring:
+	intel_ring_unpin(ce->ring);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+int intel_context_active_acquire(struct intel_context *ce)
+{
+	int err;
+
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		return err;
+
 	/* Preallocate tracking nodes */
 	if (!i915_gem_context_is_kernel(ce->gem_context)) {
 		err = i915_active_acquire_preallocate_barrier(&ce->active,
 							      ce->engine);
-		if (err)
-			goto err_state;
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
 	}
 
 	return 0;
+}
 
-err_state:
-	__context_unpin_state(ce->state);
-err_ring:
-	intel_ring_unpin(ce->ring);
-err_put:
-	intel_context_put(ce);
-	return err;
+void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
 }
 
 void
@@ -297,3 +315,7 @@  struct i915_request *intel_context_create_request(struct intel_context *ce)
 
 	return rq;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_context.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 23c7e4c0ce7c..07f9924de48f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -104,17 +104,8 @@  static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
-static inline int intel_context_active_acquire(struct intel_context *ce)
-{
-	return i915_active_acquire(&ce->active);
-}
-
-static inline void intel_context_active_release(struct intel_context *ce)
-{
-	/* Nodes preallocated in intel_context_active() */
-	i915_active_acquire_barrier(&ce->active);
-	i915_active_release(&ce->active);
-}
+int intel_context_active_acquire(struct intel_context *ce);
+void intel_context_active_release(struct intel_context *ce);
 
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
new file mode 100644
index 000000000000..b40efdaabdd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -0,0 +1,322 @@ 
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "intel_gt.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+
+static int request_sync(struct i915_request *rq)
+{
+	long timeout;
+	int err = 0;
+
+	i915_request_get(rq);
+
+	i915_request_add(rq);
+	timeout = i915_request_wait(rq, 0, HZ / 10);
+	if (timeout < 0)
+		err = timeout;
+	else
+		i915_request_retire_upto(rq);
+
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+	struct intel_timeline *tl = ce->ring->timeline;
+	int err = 0;
+
+	do {
+		struct i915_request *rq;
+		long timeout;
+
+		rcu_read_lock();
+		rq = rcu_dereference(tl->last_request.request);
+		if (rq)
+			rq = i915_request_get_rcu(rq);
+		rcu_read_unlock();
+		if (!rq)
+			break;
+
+		timeout = i915_request_wait(rq, 0, HZ / 10);
+		if (timeout < 0)
+			err = timeout;
+		else
+			i915_request_retire_upto(rq);
+
+		i915_request_put(rq);
+	} while (!err);
+
+	return err;
+}
+
+static int __live_active_context(struct intel_engine_cs *engine,
+				 struct i915_gem_context *fixme)
+{
+	struct intel_context *ce;
+	int pass;
+	int err;
+
+	/*
+	 * We keep active contexts alive until after a subsequent context
+	 * switch as the final write from the context-save will be after
+	 * we retire the final request. We track when we unpin the context,
+	 * under the presumption that the final pin is from the last request,
+	 * and instead of immediately unpinning the context, we add a task
+	 * to unpin the context from the next idle-barrier.
+	 *
+	 * This test makes sure that the context is kept alive until a
+	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
+	 * with no more outstanding requests).
+	 */
+
+	if (intel_engine_pm_is_awake(engine)) {
+		pr_err("%s is awake before starting %s!\n",
+		       engine->name, __func__);
+		return -EINVAL;
+	}
+
+	ce = intel_context_create(fixme, engine);
+	if (!ce)
+		return -ENOMEM;
+
+	for (pass = 0; pass <= 2; pass++) {
+		struct i915_request *rq;
+
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err;
+		}
+
+		err = request_sync(rq);
+		if (err)
+			goto err;
+
+		/* Context will be kept active until after an idle-barrier. */
+		if (i915_active_is_idle(&ce->active)) {
+			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
+			       engine->name, pass);
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (!intel_engine_pm_is_awake(engine)) {
+			pr_err("%s is asleep before idle-barrier\n",
+			       engine->name);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+	/* Now make sure our idle-barriers are flushed */
+	err = context_sync(engine->kernel_context);
+	if (err)
+		goto err;
+
+	if (!i915_active_is_idle(&ce->active)) {
+		pr_err("context is still active!");
+		err = -EINVAL;
+	}
+
+	if (intel_engine_pm_is_awake(engine)) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		intel_engine_dump(engine, &p,
+				  "%s is still awake after idle-barriers\n",
+				  engine->name);
+		GEM_TRACE_DUMP();
+
+		err = -EINVAL;
+		goto err;
+	}
+
+err:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_active_context(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *fixme;
+	enum intel_engine_id id;
+	struct drm_file *file;
+	int err = 0;
+
+	file = mock_file(gt->i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&gt->i915->drm.struct_mutex);
+
+	fixme = live_context(gt->i915, file);
+	if (!fixme) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	for_each_engine(engine, gt->i915, id) {
+		err = __live_active_context(engine, fixme);
+		if (err)
+			break;
+
+		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		if (err)
+			break;
+	}
+
+unlock:
+	mutex_unlock(&gt->i915->drm.struct_mutex);
+	mock_file_free(gt->i915, file);
+	return err;
+}
+
+static int __live_remote_context(struct intel_engine_cs *engine,
+				 struct i915_gem_context *fixme)
+{
+	struct intel_context *local, *remote;
+	struct i915_request *rq;
+	int pass;
+	int err;
+
+	/*
+	 * Check that our idle barriers do not interfere with normal
+	 * activity tracking. In particular, check that operating
+	 * on the context image remotely (intel_context_prepare_remote_request)
+	 * which inserts foriegn fences into intel_context.active are not
+	 * clobbered.
+	 */
+
+	remote = intel_context_create(fixme, engine);
+	if (!remote)
+		return -ENOMEM;
+
+	local = intel_context_create(fixme, engine);
+	if (!local) {
+		err = -ENOMEM;
+		goto err_remote;
+	}
+
+	for (pass = 0; pass <= 2; pass++) {
+		err = intel_context_pin(remote);
+		if (err)
+			goto err_local;
+
+		rq = intel_context_create_request(local);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_unpin;
+		}
+
+		err = intel_context_prepare_remote_request(remote, rq);
+		if (err) {
+			i915_request_add(rq);
+			goto err_unpin;
+		}
+
+		err = request_sync(rq);
+		if (err)
+			goto err_unpin;
+
+		intel_context_unpin(remote);
+		err = intel_context_pin(remote);
+		if (err)
+			goto err_local;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_unpin;
+		}
+
+		err = intel_context_prepare_remote_request(remote, rq);
+		if (err) {
+			i915_request_add(rq);
+			goto err_unpin;
+		}
+
+		err = request_sync(rq);
+		if (err)
+			goto err_unpin;
+
+		intel_context_unpin(remote);
+
+		if (i915_active_is_idle(&remote->active)) {
+			pr_err("remote context is not active; expected idle-barrier (pass %d)\n", pass);
+			err = -EINVAL;
+			goto err_local;
+		}
+	}
+
+	goto err_local;
+
+err_unpin:
+	intel_context_unpin(remote);
+err_local:
+	intel_context_put(local);
+err_remote:
+	intel_context_put(remote);
+	return err;
+}
+
+static int live_remote_context(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *fixme;
+	enum intel_engine_id id;
+	struct drm_file *file;
+	int err = 0;
+
+	file = mock_file(gt->i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&gt->i915->drm.struct_mutex);
+
+	fixme = live_context(gt->i915, file);
+	if (!fixme) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	for_each_engine(engine, gt->i915, id) {
+		err = __live_remote_context(engine, fixme);
+		if (err)
+			break;
+
+		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		if (err)
+			break;
+	}
+
+unlock:
+	mutex_unlock(&gt->i915->drm.struct_mutex);
+	mock_file_free(gt->i915, file);
+	return err;
+}
+
+int intel_context_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_active_context),
+		SUBTEST(live_remote_context),
+	};
+	struct intel_gt *gt = &i915->gt;
+
+	if (intel_gt_is_wedged(gt))
+		return 0;
+
+	return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 13f304a29fc8..e04afb519264 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -184,6 +184,7 @@  active_instance(struct i915_active *ref, u64 idx)
 	ref->cache = node;
 	mutex_unlock(&ref->mutex);
 
+	BUILD_BUG_ON(offsetof(typeof(*node), base));
 	return &node->base;
 }
 
@@ -212,6 +213,8 @@  int i915_active_ref(struct i915_active *ref,
 	struct i915_active_request *active;
 	int err;
 
+	GEM_BUG_ON(!timeline); /* reserved for idle-barrier */
+
 	/* Prevent reaping in case we malloc/wait while building the tree */
 	err = i915_active_acquire(ref);
 	if (err)
@@ -222,6 +225,7 @@  int i915_active_ref(struct i915_active *ref,
 		err = -ENOMEM;
 		goto out;
 	}
+	GEM_BUG_ON(IS_ERR(active->request));
 
 	if (!i915_active_request_isset(active))
 		atomic_inc(&ref->count);
@@ -342,6 +346,34 @@  void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+static struct active_node *idle_barrier(struct i915_active *ref)
+{
+	struct active_node *idle = NULL;
+	struct rb_node *rb;
+
+	if (RB_EMPTY_ROOT(&ref->tree))
+		return NULL;
+
+	mutex_lock(&ref->mutex);
+	for (rb = rb_first(&ref->tree); rb; rb = rb_next(rb)) {
+		struct active_node *node;
+
+		node = rb_entry(rb, typeof(*node), node);
+		if (node->timeline)
+			break;
+
+		if (!i915_active_request_isset(&node->base)) {
+			GEM_BUG_ON(!list_empty(&node->base.link));
+			rb_erase(rb, &ref->tree);
+			idle = node;
+			break;
+		}
+	}
+	mutex_unlock(&ref->mutex);
+
+	return idle;
+}
+
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine)
 {
@@ -352,22 +384,29 @@  int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 
 	GEM_BUG_ON(!engine->mask);
 	for_each_engine_masked(engine, i915, engine->mask, tmp) {
-		struct intel_context *kctx = engine->kernel_context;
 		struct active_node *node;
 
-		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-		if (unlikely(!node)) {
-			err = -ENOMEM;
-			goto unwind;
+		node = idle_barrier(ref);
+		if (!node) {
+			node = kmem_cache_alloc(global.slab_cache,
+						GFP_KERNEL |
+						__GFP_RETRY_MAYFAIL |
+						__GFP_NOWARN);
+			if (unlikely(!node)) {
+				err = -ENOMEM;
+				goto unwind;
+			}
+
+			node->ref = ref;
+			node->timeline = 0;
+			node->base.retire = node_retire;
 		}
 
-		i915_active_request_init(&node->base,
-					 (void *)engine, node_retire);
-		node->timeline = kctx->ring->timeline->fence_context;
-		node->ref = ref;
+		intel_engine_pm_get(engine);
+
+		RCU_INIT_POINTER(node->base.request, (void *)engine);
 		atomic_inc(&ref->count);
 
-		intel_engine_pm_get(engine);
 		llist_add((struct llist_node *)&node->base.link,
 			  &ref->barriers);
 	}
@@ -402,6 +441,7 @@  void i915_active_acquire_barrier(struct i915_active *ref)
 
 		node = container_of((struct list_head *)pos,
 				    typeof(*node), base.link);
+		GEM_BUG_ON(node->timeline);
 
 		engine = (void *)rcu_access_pointer(node->base.request);
 		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
@@ -410,12 +450,7 @@  void i915_active_acquire_barrier(struct i915_active *ref)
 		p = &ref->tree.rb_node;
 		while (*p) {
 			parent = *p;
-			if (rb_entry(parent,
-				     struct active_node,
-				     node)->timeline < node->timeline)
-				p = &parent->rb_right;
-			else
-				p = &parent->rb_left;
+			p = &parent->rb_left;
 		}
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 2b31a4ee0b4c..a841d3f9bedc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -15,6 +15,7 @@  selftest(workarounds, intel_workarounds_live_selftests)
 selftest(timelines, intel_timeline_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
+selftest(gt_contexts, intel_context_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(mman, i915_gem_mman_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
@@ -24,7 +25,7 @@  selftest(gtt, i915_gem_gtt_live_selftests)
 selftest(gem, i915_gem_live_selftests)
 selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
-selftest(contexts, i915_gem_context_live_selftests)
+selftest(gem_contexts, i915_gem_context_live_selftests)
 selftest(blt, i915_gem_object_blt_live_selftests)
 selftest(client, i915_gem_client_blt_live_selftests)
 selftest(reset, intel_reset_live_selftests)