diff mbox series

[09/11] drm/i915/gem: Consolidate ctx->engines[] release

Message ID 20200225082233.274530-9-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/11] drm/i915: Drop assertion that active->fence is unchanged | expand

Commit Message

Chris Wilson Feb. 25, 2020, 8:22 a.m. UTC
Use the same engine_idle_release() routine for cleaning all old
ctx->engine[] state, closing any potential races with concurrent execbuf
submission.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1241
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
Reorder set-closed/engine_idle_release to avoid premature killing
Take a reference to prevent racing context free with engine cleanup
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +++++++++++---------
 drivers/gpu/drm/i915/gem/i915_gem_context.h |   1 -
 2 files changed, 108 insertions(+), 92 deletions(-)

Comments

Tvrtko Ursulin Feb. 26, 2020, 4:41 p.m. UTC | #1
On 25/02/2020 08:22, Chris Wilson wrote:
> Use the same engine_idle_release() routine for cleaning all old
> ctx->engine[] state, closing any potential races with concurrent execbuf
> submission.
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/1241
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> Reorder set-closed/engine_idle_release to avoid premature killing
> Take a reference to prevent racing context free with engine cleanup
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +++++++++++---------
>   drivers/gpu/drm/i915/gem/i915_gem_context.h |   1 -
>   2 files changed, 108 insertions(+), 92 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index adcebf22a3d3..0862a77d81ed 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -243,7 +243,6 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
>   		if (!e->engines[count])
>   			continue;
>   
> -		RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
>   		intel_context_put(e->engines[count]);
>   	}
>   	kfree(e);
> @@ -256,7 +255,11 @@ static void free_engines(struct i915_gem_engines *e)
>   
>   static void free_engines_rcu(struct rcu_head *rcu)
>   {
> -	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
> +	struct i915_gem_engines *engines =
> +		container_of(rcu, struct i915_gem_engines, rcu);
> +
> +	i915_sw_fence_fini(&engines->fence);

This was missing so far?

> +	free_engines(engines);
>   }
>   
>   static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
> @@ -270,8 +273,6 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
>   	if (!e)
>   		return ERR_PTR(-ENOMEM);
>   
> -	e->ctx = ctx;
> -
>   	for_each_engine(engine, gt, id) {
>   		struct intel_context *ce;
>   
> @@ -305,7 +306,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	list_del(&ctx->link);
>   	spin_unlock(&ctx->i915->gem.contexts.lock);
>   
> -	free_engines(rcu_access_pointer(ctx->engines));
>   	mutex_destroy(&ctx->engines_mutex);
>   
>   	if (ctx->timeline)
> @@ -492,30 +492,110 @@ static void kill_engines(struct i915_gem_engines *engines)
>   static void kill_stale_engines(struct i915_gem_context *ctx)
>   {
>   	struct i915_gem_engines *pos, *next;
> -	unsigned long flags;
>   
> -	spin_lock_irqsave(&ctx->stale.lock, flags);
> +	spin_lock_irq(&ctx->stale.lock);
> +	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
> -		if (!i915_sw_fence_await(&pos->fence))
> +		if (!i915_sw_fence_await(&pos->fence)) {
> +			list_del_init(&pos->link);
>   			continue;
> +		}
>   
> -		spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +		spin_unlock_irq(&ctx->stale.lock);
>   
>   		kill_engines(pos);
>   
> -		spin_lock_irqsave(&ctx->stale.lock, flags);
> +		spin_lock_irq(&ctx->stale.lock);
> +		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
>   		list_safe_reset_next(pos, next, link);
>   		list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */
>   
>   		i915_sw_fence_complete(&pos->fence);
>   	}
> -	spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +	spin_unlock_irq(&ctx->stale.lock);
>   }
>   
>   static void kill_context(struct i915_gem_context *ctx)
>   {
>   	kill_stale_engines(ctx);
> -	kill_engines(__context_engines_static(ctx));
> +}
> +
> +static int engines_notify(struct i915_sw_fence *fence,
> +			  enum i915_sw_fence_notify state)
> +{
> +	struct i915_gem_engines *engines =
> +		container_of(fence, typeof(*engines), fence);
> +
> +	switch (state) {
> +	case FENCE_COMPLETE:
> +		if (!list_empty(&engines->link)) {
> +			struct i915_gem_context *ctx = engines->ctx;
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&ctx->stale.lock, flags);
> +			list_del(&engines->link);
> +			spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +		}
> +		break;
> +
> +	case FENCE_FREE:
> +		i915_gem_context_put(engines->ctx);

This put can go under FENCE_COMPLETE?

> +		init_rcu_head(&engines->rcu);
> +		call_rcu(&engines->rcu, free_engines_rcu);
> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static void engines_idle_release(struct i915_gem_context *ctx,
> +				 struct i915_gem_engines *engines)
> +{
> +	struct i915_gem_engines_iter it;
> +	struct intel_context *ce;
> +
> +	i915_sw_fence_init(&engines->fence, engines_notify);
> +	INIT_LIST_HEAD(&engines->link);
> +
> +	engines->ctx = i915_gem_context_get(ctx);
> +
> +	for_each_gem_engine(ce, engines, it) {
> +		int err = 0;
> +
> +		RCU_INIT_POINTER(ce->gem_context, NULL);
> +
> +		if (!ce->timeline) { /* XXX serialisation with execbuf? */
> +			intel_context_set_banned(ce);

What is banned for?

> +			continue;
> +		}
> +
> +		mutex_lock(&ce->timeline->mutex);
> +		if (!list_empty(&ce->timeline->requests)) {
> +			struct i915_request *rq;
> +
> +			rq = list_last_entry(&ce->timeline->requests,
> +					     typeof(*rq),
> +					     link);

Why no more i915_active_fence_get?

> +
> +			err = i915_sw_fence_await_dma_fence(&engines->fence,
> +							    &rq->fence, 0,
> +							    GFP_KERNEL);
> +		}
> +		mutex_unlock(&ce->timeline->mutex);
> +		if (err < 0)
> +			goto kill;
> +	}
> +
> +	spin_lock_irq(&engines->ctx->stale.lock);
> +	if (!i915_gem_context_is_closed(engines->ctx))
> +		list_add_tail(&engines->link, &engines->ctx->stale.engines);
> +	spin_unlock_irq(&engines->ctx->stale.lock);
> +
> +kill:
> +	if (list_empty(&engines->link)) /* raced, already closed */
> +		kill_engines(engines);
> +
> +	i915_sw_fence_commit(&engines->fence);
>   }
>   
>   static void set_closed_name(struct i915_gem_context *ctx)
> @@ -539,11 +619,16 @@ static void context_close(struct i915_gem_context *ctx)
>   {
>   	struct i915_address_space *vm;
>   
> +	/* Flush any concurrent set_engines() */
> +	mutex_lock(&ctx->engines_mutex);
> +	engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
>   	i915_gem_context_set_closed(ctx);
> -	set_closed_name(ctx);
> +	mutex_unlock(&ctx->engines_mutex);
>   
>   	mutex_lock(&ctx->mutex);
>   
> +	set_closed_name(ctx);
> +
>   	vm = i915_gem_context_vm(ctx);
>   	if (vm)
>   		i915_vm_close(vm);
> @@ -1562,77 +1647,6 @@ static const i915_user_extension_fn set_engines__extensions[] = {
>   	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>   };
>   
> -static int engines_notify(struct i915_sw_fence *fence,
> -			  enum i915_sw_fence_notify state)
> -{
> -	struct i915_gem_engines *engines =
> -		container_of(fence, typeof(*engines), fence);
> -
> -	switch (state) {
> -	case FENCE_COMPLETE:
> -		if (!list_empty(&engines->link)) {
> -			struct i915_gem_context *ctx = engines->ctx;
> -			unsigned long flags;
> -
> -			spin_lock_irqsave(&ctx->stale.lock, flags);
> -			list_del(&engines->link);
> -			spin_unlock_irqrestore(&ctx->stale.lock, flags);
> -		}
> -		break;
> -
> -	case FENCE_FREE:
> -		init_rcu_head(&engines->rcu);
> -		call_rcu(&engines->rcu, free_engines_rcu);
> -		break;
> -	}
> -
> -	return NOTIFY_DONE;
> -}
> -
> -static void engines_idle_release(struct i915_gem_engines *engines)
> -{
> -	struct i915_gem_engines_iter it;
> -	struct intel_context *ce;
> -	unsigned long flags;
> -
> -	GEM_BUG_ON(!engines);
> -	i915_sw_fence_init(&engines->fence, engines_notify);
> -
> -	INIT_LIST_HEAD(&engines->link);
> -	spin_lock_irqsave(&engines->ctx->stale.lock, flags);
> -	if (!i915_gem_context_is_closed(engines->ctx))
> -		list_add(&engines->link, &engines->ctx->stale.engines);
> -	spin_unlock_irqrestore(&engines->ctx->stale.lock, flags);
> -	if (list_empty(&engines->link)) /* raced, already closed */
> -		goto kill;
> -
> -	for_each_gem_engine(ce, engines, it) {
> -		struct dma_fence *fence;
> -		int err;
> -
> -		if (!ce->timeline)
> -			continue;
> -
> -		fence = i915_active_fence_get(&ce->timeline->last_request);
> -		if (!fence)
> -			continue;
> -
> -		err = i915_sw_fence_await_dma_fence(&engines->fence,
> -						    fence, 0,
> -						    GFP_KERNEL);
> -
> -		dma_fence_put(fence);
> -		if (err < 0)
> -			goto kill;
> -	}
> -	goto out;
> -
> -kill:
> -	kill_engines(engines);
> -out:
> -	i915_sw_fence_commit(&engines->fence);
> -}
> -
>   static int
>   set_engines(struct i915_gem_context *ctx,
>   	    const struct drm_i915_gem_context_param *args)
> @@ -1675,8 +1689,6 @@ set_engines(struct i915_gem_context *ctx,
>   	if (!set.engines)
>   		return -ENOMEM;
>   
> -	set.engines->ctx = ctx;
> -
>   	for (n = 0; n < num_engines; n++) {
>   		struct i915_engine_class_instance ci;
>   		struct intel_engine_cs *engine;
> @@ -1729,6 +1741,11 @@ set_engines(struct i915_gem_context *ctx,
>   
>   replace:
>   	mutex_lock(&ctx->engines_mutex);
> +	if (i915_gem_context_is_closed(ctx)) {
> +		mutex_unlock(&ctx->engines_mutex);
> +		free_engines(set.engines);
> +		return -ENOENT;
> +	}
>   	if (args->size)
>   		i915_gem_context_set_user_engines(ctx);
>   	else
> @@ -1737,7 +1754,7 @@ set_engines(struct i915_gem_context *ctx,
>   	mutex_unlock(&ctx->engines_mutex);
>   
>   	/* Keep track of old engine sets for kill_context() */
> -	engines_idle_release(set.engines);
> +	engines_idle_release(ctx, set.engines);
>   
>   	return 0;
>   }
> @@ -1995,8 +2012,6 @@ static int clone_engines(struct i915_gem_context *dst,
>   	if (!clone)
>   		goto err_unlock;
>   
> -	clone->ctx = dst;
> -
>   	for (n = 0; n < e->num_engines; n++) {
>   		struct intel_engine_cs *engine;
>   
> @@ -2033,8 +2048,7 @@ static int clone_engines(struct i915_gem_context *dst,
>   	i915_gem_context_unlock_engines(src);
>   
>   	/* Serialised by constructor */
> -	free_engines(__context_engines_static(dst));
> -	RCU_INIT_POINTER(dst->engines, clone);
> +	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
>   	if (user_engines)
>   		i915_gem_context_set_user_engines(dst);
>   	else
> @@ -2461,6 +2475,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
>   	const struct i915_gem_engines *e = it->engines;
>   	struct intel_context *ctx;
>   
> +	if (unlikely(!e))
> +		return NULL;
> +
>   	do {
>   		if (it->idx >= e->num_engines)
>   			return NULL;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 3ae61a355d87..57b7ae2893e1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -207,7 +207,6 @@ static inline void
>   i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
>   			   struct i915_gem_engines *engines)
>   {
> -	GEM_BUG_ON(!engines);
>   	it->engines = engines;
>   	it->idx = 0;
>   }
> 

Regards,

Tvrtko
Chris Wilson Feb. 26, 2020, 5:06 p.m. UTC | #2
Quoting Tvrtko Ursulin (2020-02-26 16:41:03)
> 
> On 25/02/2020 08:22, Chris Wilson wrote:
> > Use the same engine_idle_release() routine for cleaning all old
> > ctx->engine[] state, closing any potential races with concurrent execbuf
> > submission.
> > 
> > Closes: https://gitlab.freedesktop.org/drm/intel/issues/1241
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > Reorder set-closed/engine_idle_release to avoid premature killing
> > Take a reference to prevent racing context free with engine cleanup
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +++++++++++---------
> >   drivers/gpu/drm/i915/gem/i915_gem_context.h |   1 -
> >   2 files changed, 108 insertions(+), 92 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index adcebf22a3d3..0862a77d81ed 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -243,7 +243,6 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> >               if (!e->engines[count])
> >                       continue;
> >   
> > -             RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
> >               intel_context_put(e->engines[count]);
> >       }
> >       kfree(e);
> > @@ -256,7 +255,11 @@ static void free_engines(struct i915_gem_engines *e)
> >   
> >   static void free_engines_rcu(struct rcu_head *rcu)
> >   {
> > -     free_engines(container_of(rcu, struct i915_gem_engines, rcu));
> > +     struct i915_gem_engines *engines =
> > +             container_of(rcu, struct i915_gem_engines, rcu);
> > +
> > +     i915_sw_fence_fini(&engines->fence);
> 
> This was missing so far?

Yes. Completely missed it until throwing it in a loop long enough for
kmalloc recycling to catch up. And having ODEBUG enabled helps!

> > +static int engines_notify(struct i915_sw_fence *fence,
> > +                       enum i915_sw_fence_notify state)
> > +{
> > +     struct i915_gem_engines *engines =
> > +             container_of(fence, typeof(*engines), fence);
> > +
> > +     switch (state) {
> > +     case FENCE_COMPLETE:
> > +             if (!list_empty(&engines->link)) {
> > +                     struct i915_gem_context *ctx = engines->ctx;
> > +                     unsigned long flags;
> > +
> > +                     spin_lock_irqsave(&ctx->stale.lock, flags);
> > +                     list_del(&engines->link);
> > +                     spin_unlock_irqrestore(&ctx->stale.lock, flags);
> > +             }
> > +             break;
> > +
> > +     case FENCE_FREE:
> > +             i915_gem_context_put(engines->ctx);
> 
> This put can go under FENCE_COMPLETE?

Yes. Either works, I thought it was more of a release operation. But if
you would rather FENCE_FREE == just call_rcu(free_engines_rcu), I can see
the elegance in that.

> > +             init_rcu_head(&engines->rcu);
> > +             call_rcu(&engines->rcu, free_engines_rcu);
> > +             break;
> > +     }
> > +
> > +     return NOTIFY_DONE;
> > +}
> > +
> > +static void engines_idle_release(struct i915_gem_context *ctx,
> > +                              struct i915_gem_engines *engines)
> > +{
> > +     struct i915_gem_engines_iter it;
> > +     struct intel_context *ce;
> > +
> > +     i915_sw_fence_init(&engines->fence, engines_notify);
> > +     INIT_LIST_HEAD(&engines->link);
> > +
> > +     engines->ctx = i915_gem_context_get(ctx);
> > +
> > +     for_each_gem_engine(ce, engines, it) {
> > +             int err = 0;
> > +
> > +             RCU_INIT_POINTER(ce->gem_context, NULL);
> > +
> > +             if (!ce->timeline) { /* XXX serialisation with execbuf? */
> > +                     intel_context_set_banned(ce);
> 
> What is banned for?

Banned is how we prevent further execution. The problem here is making
sure we catch concurrent execbuf allocating/pinning the context. This
does not and leaves a window in which between the !ce->timline and
set_banned the other thread could see in with the hanging batch :|

On the other hand, we don't want to mark the context as banned too
early. So we unfortunately can't mark it unconditionally.

> > +                     continue;
> > +             }
> > +
> > +             mutex_lock(&ce->timeline->mutex);
> > +             if (!list_empty(&ce->timeline->requests)) {
> > +                     struct i915_request *rq;
> > +
> > +                     rq = list_last_entry(&ce->timeline->requests,
> > +                                          typeof(*rq),
> > +                                          link);
> 
> Why no more i915_active_fence_get?

I was looking for something concrete with which we can serialise with
execbuf, the timeline mutex is one and we can check for a late ban
inside execbuf.

But there's still the tiny window above.

Hmm. Actually the ce->pin_mutex might work^Whelp for execbuf serialisation.
Not by itself it won't though. But it should be able to close the
!ce->timeline hole...
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index adcebf22a3d3..0862a77d81ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -243,7 +243,6 @@  static void __free_engines(struct i915_gem_engines *e, unsigned int count)
 		if (!e->engines[count])
 			continue;
 
-		RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
 		intel_context_put(e->engines[count]);
 	}
 	kfree(e);
@@ -256,7 +255,11 @@  static void free_engines(struct i915_gem_engines *e)
 
 static void free_engines_rcu(struct rcu_head *rcu)
 {
-	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
+	struct i915_gem_engines *engines =
+		container_of(rcu, struct i915_gem_engines, rcu);
+
+	i915_sw_fence_fini(&engines->fence);
+	free_engines(engines);
 }
 
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
@@ -270,8 +273,6 @@  static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	if (!e)
 		return ERR_PTR(-ENOMEM);
 
-	e->ctx = ctx;
-
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce;
 
@@ -305,7 +306,6 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 	list_del(&ctx->link);
 	spin_unlock(&ctx->i915->gem.contexts.lock);
 
-	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
 
 	if (ctx->timeline)
@@ -492,30 +492,110 @@  static void kill_engines(struct i915_gem_engines *engines)
 static void kill_stale_engines(struct i915_gem_context *ctx)
 {
 	struct i915_gem_engines *pos, *next;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ctx->stale.lock, flags);
+	spin_lock_irq(&ctx->stale.lock);
+	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
-		if (!i915_sw_fence_await(&pos->fence))
+		if (!i915_sw_fence_await(&pos->fence)) {
+			list_del_init(&pos->link);
 			continue;
+		}
 
-		spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		spin_unlock_irq(&ctx->stale.lock);
 
 		kill_engines(pos);
 
-		spin_lock_irqsave(&ctx->stale.lock, flags);
+		spin_lock_irq(&ctx->stale.lock);
+		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
 		list_safe_reset_next(pos, next, link);
 		list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */
 
 		i915_sw_fence_complete(&pos->fence);
 	}
-	spin_unlock_irqrestore(&ctx->stale.lock, flags);
+	spin_unlock_irq(&ctx->stale.lock);
 }
 
 static void kill_context(struct i915_gem_context *ctx)
 {
 	kill_stale_engines(ctx);
-	kill_engines(__context_engines_static(ctx));
+}
+
+static int engines_notify(struct i915_sw_fence *fence,
+			  enum i915_sw_fence_notify state)
+{
+	struct i915_gem_engines *engines =
+		container_of(fence, typeof(*engines), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		if (!list_empty(&engines->link)) {
+			struct i915_gem_context *ctx = engines->ctx;
+			unsigned long flags;
+
+			spin_lock_irqsave(&ctx->stale.lock, flags);
+			list_del(&engines->link);
+			spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		}
+		break;
+
+	case FENCE_FREE:
+		i915_gem_context_put(engines->ctx);
+		init_rcu_head(&engines->rcu);
+		call_rcu(&engines->rcu, free_engines_rcu);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void engines_idle_release(struct i915_gem_context *ctx,
+				 struct i915_gem_engines *engines)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	i915_sw_fence_init(&engines->fence, engines_notify);
+	INIT_LIST_HEAD(&engines->link);
+
+	engines->ctx = i915_gem_context_get(ctx);
+
+	for_each_gem_engine(ce, engines, it) {
+		int err = 0;
+
+		RCU_INIT_POINTER(ce->gem_context, NULL);
+
+		if (!ce->timeline) { /* XXX serialisation with execbuf? */
+			intel_context_set_banned(ce);
+			continue;
+		}
+
+		mutex_lock(&ce->timeline->mutex);
+		if (!list_empty(&ce->timeline->requests)) {
+			struct i915_request *rq;
+
+			rq = list_last_entry(&ce->timeline->requests,
+					     typeof(*rq),
+					     link);
+
+			err = i915_sw_fence_await_dma_fence(&engines->fence,
+							    &rq->fence, 0,
+							    GFP_KERNEL);
+		}
+		mutex_unlock(&ce->timeline->mutex);
+		if (err < 0)
+			goto kill;
+	}
+
+	spin_lock_irq(&engines->ctx->stale.lock);
+	if (!i915_gem_context_is_closed(engines->ctx))
+		list_add_tail(&engines->link, &engines->ctx->stale.engines);
+	spin_unlock_irq(&engines->ctx->stale.lock);
+
+kill:
+	if (list_empty(&engines->link)) /* raced, already closed */
+		kill_engines(engines);
+
+	i915_sw_fence_commit(&engines->fence);
 }
 
 static void set_closed_name(struct i915_gem_context *ctx)
@@ -539,11 +619,16 @@  static void context_close(struct i915_gem_context *ctx)
 {
 	struct i915_address_space *vm;
 
+	/* Flush any concurrent set_engines() */
+	mutex_lock(&ctx->engines_mutex);
+	engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
 	i915_gem_context_set_closed(ctx);
-	set_closed_name(ctx);
+	mutex_unlock(&ctx->engines_mutex);
 
 	mutex_lock(&ctx->mutex);
 
+	set_closed_name(ctx);
+
 	vm = i915_gem_context_vm(ctx);
 	if (vm)
 		i915_vm_close(vm);
@@ -1562,77 +1647,6 @@  static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
-static int engines_notify(struct i915_sw_fence *fence,
-			  enum i915_sw_fence_notify state)
-{
-	struct i915_gem_engines *engines =
-		container_of(fence, typeof(*engines), fence);
-
-	switch (state) {
-	case FENCE_COMPLETE:
-		if (!list_empty(&engines->link)) {
-			struct i915_gem_context *ctx = engines->ctx;
-			unsigned long flags;
-
-			spin_lock_irqsave(&ctx->stale.lock, flags);
-			list_del(&engines->link);
-			spin_unlock_irqrestore(&ctx->stale.lock, flags);
-		}
-		break;
-
-	case FENCE_FREE:
-		init_rcu_head(&engines->rcu);
-		call_rcu(&engines->rcu, free_engines_rcu);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static void engines_idle_release(struct i915_gem_engines *engines)
-{
-	struct i915_gem_engines_iter it;
-	struct intel_context *ce;
-	unsigned long flags;
-
-	GEM_BUG_ON(!engines);
-	i915_sw_fence_init(&engines->fence, engines_notify);
-
-	INIT_LIST_HEAD(&engines->link);
-	spin_lock_irqsave(&engines->ctx->stale.lock, flags);
-	if (!i915_gem_context_is_closed(engines->ctx))
-		list_add(&engines->link, &engines->ctx->stale.engines);
-	spin_unlock_irqrestore(&engines->ctx->stale.lock, flags);
-	if (list_empty(&engines->link)) /* raced, already closed */
-		goto kill;
-
-	for_each_gem_engine(ce, engines, it) {
-		struct dma_fence *fence;
-		int err;
-
-		if (!ce->timeline)
-			continue;
-
-		fence = i915_active_fence_get(&ce->timeline->last_request);
-		if (!fence)
-			continue;
-
-		err = i915_sw_fence_await_dma_fence(&engines->fence,
-						    fence, 0,
-						    GFP_KERNEL);
-
-		dma_fence_put(fence);
-		if (err < 0)
-			goto kill;
-	}
-	goto out;
-
-kill:
-	kill_engines(engines);
-out:
-	i915_sw_fence_commit(&engines->fence);
-}
-
 static int
 set_engines(struct i915_gem_context *ctx,
 	    const struct drm_i915_gem_context_param *args)
@@ -1675,8 +1689,6 @@  set_engines(struct i915_gem_context *ctx,
 	if (!set.engines)
 		return -ENOMEM;
 
-	set.engines->ctx = ctx;
-
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
@@ -1729,6 +1741,11 @@  set_engines(struct i915_gem_context *ctx,
 
 replace:
 	mutex_lock(&ctx->engines_mutex);
+	if (i915_gem_context_is_closed(ctx)) {
+		mutex_unlock(&ctx->engines_mutex);
+		free_engines(set.engines);
+		return -ENOENT;
+	}
 	if (args->size)
 		i915_gem_context_set_user_engines(ctx);
 	else
@@ -1737,7 +1754,7 @@  set_engines(struct i915_gem_context *ctx,
 	mutex_unlock(&ctx->engines_mutex);
 
 	/* Keep track of old engine sets for kill_context() */
-	engines_idle_release(set.engines);
+	engines_idle_release(ctx, set.engines);
 
 	return 0;
 }
@@ -1995,8 +2012,6 @@  static int clone_engines(struct i915_gem_context *dst,
 	if (!clone)
 		goto err_unlock;
 
-	clone->ctx = dst;
-
 	for (n = 0; n < e->num_engines; n++) {
 		struct intel_engine_cs *engine;
 
@@ -2033,8 +2048,7 @@  static int clone_engines(struct i915_gem_context *dst,
 	i915_gem_context_unlock_engines(src);
 
 	/* Serialised by constructor */
-	free_engines(__context_engines_static(dst));
-	RCU_INIT_POINTER(dst->engines, clone);
+	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
 	if (user_engines)
 		i915_gem_context_set_user_engines(dst);
 	else
@@ -2461,6 +2475,9 @@  i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
 	const struct i915_gem_engines *e = it->engines;
 	struct intel_context *ctx;
 
+	if (unlikely(!e))
+		return NULL;
+
 	do {
 		if (it->idx >= e->num_engines)
 			return NULL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 3ae61a355d87..57b7ae2893e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -207,7 +207,6 @@  static inline void
 i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
 			   struct i915_gem_engines *engines)
 {
-	GEM_BUG_ON(!engines);
 	it->engines = engines;
 	it->idx = 0;
 }