[11/15] drm/i915/execlists: Cancel banned contexts on schedule-out
diff mbox series

Message ID 20191014090757.32111-11-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [01/15] drm/i915/display: Squelch kerneldoc warnings
Related show

Commit Message

Chris Wilson Oct. 14, 2019, 9:07 a.m. UTC
On schedule-out (CS completion) of a banned context, scrub the context
image so that we do not replay the active payload. The intent is that we
skip banned payloads on request submission so that the timeline
advancement continues on in the background. However, if we are returning
to a preempted request, i915_request_skip() is ineffective and instead we
need to patch up the context image so that it continues from the start
of the next request.

v2: Fixup cancellation so that we only scrub the payload of the active
request and do not short-circuit the breadcrumbs (which might cause
other contexts to execute out of order).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    | 129 ++++++++----
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
 2 files changed, 361 insertions(+), 41 deletions(-)

Comments

Tvrtko Ursulin Oct. 14, 2019, noon UTC | #1
On 14/10/2019 10:07, Chris Wilson wrote:
> On schedule-out (CS completion) of a banned context, scrub the context
> image so that we do not replay the active payload. The intent is that we
> skip banned payloads on request submission so that the timeline
> advancement continues on in the background. However, if we are returning
> to a preempted request, i915_request_skip() is ineffective and instead we
> need to patch up the context image so that it continues from the start
> of the next request.
> 
> v2: Fixup cancellation so that we only scrub the payload of the active
> request and do not short-circuit the breadcrumbs (which might cause
> other contexts to execute out of order).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    | 129 ++++++++----
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
>   2 files changed, 361 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e16ede75412b..b76b35194114 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
>   				     const struct intel_engine_cs *engine,
>   				     const struct intel_ring *ring,
>   				     bool close);
> +static void
> +__execlists_update_reg_state(const struct intel_context *ce,
> +			     const struct intel_engine_cs *engine);
>   
>   static void cancel_timer(struct timer_list *t)
>   {
> @@ -270,6 +273,31 @@ static void mark_eio(struct i915_request *rq)
>   	i915_request_mark_complete(rq);
>   }
>   
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> +	const struct intel_context * const ce = rq->hw_context;
> +	struct i915_request *active = NULL;
> +	struct list_head *list;
> +
> +	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> +		return rq;
> +
> +	rcu_read_lock();
> +	list = &rcu_dereference(rq->timeline)->requests;
> +	list_for_each_entry_from_reverse(rq, list, link) {
> +		if (i915_request_completed(rq))
> +			break;
> +
> +		if (rq->hw_context != ce)
> +			break;
> +
> +		active = rq;
> +	}
> +	rcu_read_unlock();
> +
> +	return active;
> +}
> +
>   static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
>   {
>   	return (i915_ggtt_offset(engine->status_page.vma) +
> @@ -991,6 +1019,56 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>   		tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static void restore_default_state(struct intel_context *ce,
> +				  struct intel_engine_cs *engine)
> +{
> +	u32 *regs = ce->lrc_reg_state;
> +
> +	if (engine->pinned_default_state)
> +		memcpy(regs, /* skip restoring the vanilla PPHWSP */
> +		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> +		       engine->context_size - PAGE_SIZE);
> +
> +	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +}
> +
> +static void cancel_active(struct i915_request *rq,
> +			  struct intel_engine_cs *engine)
> +{
> +	struct intel_context * const ce = rq->hw_context;
> +
> +	/*
> +	 * The executing context has been cancelled. Fixup the context so that
> +	 * it will be marked as incomplete [-EIO] upon resubmission and not
> +	 * execute any user payloads. We preserve the breadcrumbs and
> +	 * semaphores of the incomplete requests so that inter-timeline
> +	 * dependencies (i.e other timelines) remain correctly ordered.
> +	 *
> +	 * See __i915_request_submit() for applying -EIO and removing the
> +	 * payload on resubmission.
> +	 */
> +	GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> +		  __func__, engine->name, rq->fence.context, rq->fence.seqno);
> +	__context_pin_acquire(ce);
> +
> +	/* On resubmission of the active request, payload will be scrubbed */
> +	rq = active_request(rq);
> +	if (rq)
> +		ce->ring->head = intel_ring_wrap(ce->ring, rq->head);

Without this change, where would the head be pointing after 
schedule_out? Somewhere in the middle of the active request? And with 
this change it is rewound to the start of it?

Regards,

Tvrtko

> +	else
> +		ce->ring->head = ce->ring->tail;
> +	intel_ring_update_space(ce->ring);
> +
> +	/* Scrub the context image to prevent replaying the previous batch */
> +	restore_default_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine);
> +
> +	/* We've switched away, so this should be a no-op, but intent matters */
> +	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> +
> +	__context_pin_release(ce);
> +}
> +
>   static inline void
>   __execlists_schedule_out(struct i915_request *rq,
>   			 struct intel_engine_cs * const engine)
> @@ -1001,6 +1079,9 @@ __execlists_schedule_out(struct i915_request *rq,
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>   	intel_gt_pm_put(engine->gt);
>   
> +	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> +		cancel_active(rq, engine);
> +
>   	/*
>   	 * If this is part of a virtual engine, its next request may
>   	 * have been blocked waiting for access to the active context.
> @@ -1395,6 +1476,10 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
>   	if (!rq)
>   		return 0;
>   
> +	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> +	if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
> +		return 1;
> +
>   	return READ_ONCE(engine->props.preempt_timeout);
>   }
>   
> @@ -2810,29 +2895,6 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   			       &execlists->csb_status[reset_value]);
>   }
>   
> -static struct i915_request *active_request(struct i915_request *rq)
> -{
> -	const struct intel_context * const ce = rq->hw_context;
> -	struct i915_request *active = NULL;
> -	struct list_head *list;
> -
> -	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> -		return rq;
> -
> -	list = &i915_request_active_timeline(rq)->requests;
> -	list_for_each_entry_from_reverse(rq, list, link) {
> -		if (i915_request_completed(rq))
> -			break;
> -
> -		if (rq->hw_context != ce)
> -			break;
> -
> -		active = rq;
> -	}
> -
> -	return active;
> -}
> -
>   static void __execlists_reset_reg_state(const struct intel_context *ce,
>   					const struct intel_engine_cs *engine)
>   {
> @@ -2849,7 +2911,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct intel_context *ce;
>   	struct i915_request *rq;
> -	u32 *regs;
>   
>   	mb(); /* paranoia: read the CSB pointers from after the reset */
>   	clflush(execlists->csb_write);
> @@ -2928,13 +2989,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	 * to recreate its own state.
>   	 */
>   	GEM_BUG_ON(!intel_context_is_pinned(ce));
> -	regs = ce->lrc_reg_state;
> -	if (engine->pinned_default_state) {
> -		memcpy(regs, /* skip restoring the vanilla PPHWSP */
> -		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> -		       engine->context_size - PAGE_SIZE);
> -	}
> -	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +	restore_default_state(ce, engine);
>   
>   out_replay:
>   	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
> @@ -4571,16 +4626,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
>   	 * future request will be after userspace has had the opportunity
>   	 * to recreate its own state.
>   	 */
> -	if (scrub) {
> -		u32 *regs = ce->lrc_reg_state;
> -
> -		if (engine->pinned_default_state) {
> -			memcpy(regs, /* skip restoring the vanilla PPHWSP */
> -			       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> -			       engine->context_size - PAGE_SIZE);
> -		}
> -		execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> -	}
> +	if (scrub)
> +		restore_default_state(ce, engine);
>   
>   	/* Rerun the request; its payload has been neutered (if guilty). */
>   	ce->ring->head = head;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index e7a86f60cf82..dd3e7b47b76d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -7,6 +7,7 @@
>   #include <linux/prime_numbers.h>
>   
>   #include "gem/i915_gem_pm.h"
> +#include "gt/intel_engine_heartbeat.h"
>   #include "gt/intel_reset.h"
>   
>   #include "i915_selftest.h"
> @@ -1016,6 +1017,277 @@ static int live_nopreempt(void *arg)
>   	goto err_client_b;
>   }
>   
> +struct live_preempt_cancel {
> +	struct intel_engine_cs *engine;
> +	struct preempt_client a, b;
> +};
> +
> +static int __cancel_active0(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq;
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Preempt cancel of ELSP0 */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq = spinner_create_request(&arg->a.spin,
> +				    arg->a.ctx, arg->engine,
> +				    MI_ARB_CHECK);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	i915_gem_context_set_banned(arg->a.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq->fence.error != -EIO) {
> +		pr_err("Cancelled inflight0 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int __cancel_active1(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq[2] = {};
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Preempt cancel of ELSP1 */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq[0] = spinner_create_request(&arg->a.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_NOOP); /* no preemption */
> +	if (IS_ERR(rq[0]))
> +		return PTR_ERR(rq[0]);
> +
> +	i915_request_get(rq[0]);
> +	i915_request_add(rq[0]);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> +	rq[1] = spinner_create_request(&arg->b.spin,
> +				       arg->b.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[1])) {
> +		err = PTR_ERR(rq[1]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[1]);
> +	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> +	i915_request_add(rq[1]);
> +	if (err)
> +		goto out;
> +
> +	i915_gem_context_set_banned(arg->b.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	igt_spinner_end(&arg->a.spin);
> +	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq[0]->fence.error != 0) {
> +		pr_err("Normal inflight0 request did not complete\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[1]->fence.error != -EIO) {
> +		pr_err("Cancelled inflight1 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq[1]);
> +	i915_request_put(rq[0]);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int __cancel_queued(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq[3] = {};
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Full ELSP and one in the wings */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq[0] = spinner_create_request(&arg->a.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[0]))
> +		return PTR_ERR(rq[0]);
> +
> +	i915_request_get(rq[0]);
> +	i915_request_add(rq[0]);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> +	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
> +	if (IS_ERR(rq[1])) {
> +		err = PTR_ERR(rq[1]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[1]);
> +	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> +	i915_request_add(rq[1]);
> +	if (err)
> +		goto out;
> +
> +	rq[2] = spinner_create_request(&arg->b.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[2])) {
> +		err = PTR_ERR(rq[2]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[2]);
> +	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
> +	i915_request_add(rq[2]);
> +	if (err)
> +		goto out;
> +
> +	i915_gem_context_set_banned(arg->a.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq[0]->fence.error != -EIO) {
> +		pr_err("Cancelled inflight0 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[1]->fence.error != 0) {
> +		pr_err("Normal inflight1 request did not complete\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[2]->fence.error != -EIO) {
> +		pr_err("Cancelled queued request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq[2]);
> +	i915_request_put(rq[1]);
> +	i915_request_put(rq[0]);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int live_preempt_cancel(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_preempt_cancel data;
> +	enum intel_engine_id id;
> +	int err = -ENOMEM;
> +
> +	/*
> +	 * To cancel an inflight context, we need to first remove it from the
> +	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
> +	 */
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> +		return 0;
> +
> +	if (preempt_client_init(i915, &data.a))
> +		return -ENOMEM;
> +	if (preempt_client_init(i915, &data.b))
> +		goto err_client_a;
> +
> +	for_each_engine(data.engine, i915, id) {
> +		if (!intel_engine_has_preemption(data.engine))
> +			continue;
> +
> +		err = __cancel_active0(&data);
> +		if (err)
> +			goto err_wedged;
> +
> +		err = __cancel_active1(&data);
> +		if (err)
> +			goto err_wedged;
> +
> +		err = __cancel_queued(&data);
> +		if (err)
> +			goto err_wedged;
> +	}
> +
> +	err = 0;
> +err_client_b:
> +	preempt_client_fini(&data.b);
> +err_client_a:
> +	preempt_client_fini(&data.a);
> +	return err;
> +
> +err_wedged:
> +	GEM_TRACE_DUMP();
> +	igt_spinner_end(&data.b.spin);
> +	igt_spinner_end(&data.a.spin);
> +	intel_gt_set_wedged(&i915->gt);
> +	goto err_client_b;
> +}
> +
>   static int live_suppress_self_preempt(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -2549,6 +2821,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_nopreempt),
> +		SUBTEST(live_preempt_cancel),
>   		SUBTEST(live_suppress_self_preempt),
>   		SUBTEST(live_suppress_wait_preempt),
>   		SUBTEST(live_chain_preempt),
>
Chris Wilson Oct. 14, 2019, 12:06 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
> 
> On 14/10/2019 10:07, Chris Wilson wrote:
> > On schedule-out (CS completion) of a banned context, scrub the context
> > image so that we do not replay the active payload. The intent is that we
> > skip banned payloads on request submission so that the timeline
> > advancement continues on in the background. However, if we are returning
> > to a preempted request, i915_request_skip() is ineffective and instead we
> > need to patch up the context image so that it continues from the start
> > of the next request.
> > 
> > v2: Fixup cancellation so that we only scrub the payload of the active
> > request and do not short-circuit the breadcrumbs (which might cause
> > other contexts to execute out of order).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c    | 129 ++++++++----
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
> >   2 files changed, 361 insertions(+), 41 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index e16ede75412b..b76b35194114 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
> >                                    const struct intel_engine_cs *engine,
> >                                    const struct intel_ring *ring,
> >                                    bool close);
> > +static void
> > +__execlists_update_reg_state(const struct intel_context *ce,
> > +                          const struct intel_engine_cs *engine);
> >   
> >   static void cancel_timer(struct timer_list *t)
> >   {
> > @@ -270,6 +273,31 @@ static void mark_eio(struct i915_request *rq)
> >       i915_request_mark_complete(rq);
> >   }
> >   
> > +static struct i915_request *active_request(struct i915_request *rq)
> > +{
> > +     const struct intel_context * const ce = rq->hw_context;
> > +     struct i915_request *active = NULL;
> > +     struct list_head *list;
> > +
> > +     if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> > +             return rq;
> > +
> > +     rcu_read_lock();
> > +     list = &rcu_dereference(rq->timeline)->requests;
> > +     list_for_each_entry_from_reverse(rq, list, link) {
> > +             if (i915_request_completed(rq))
> > +                     break;
> > +
> > +             if (rq->hw_context != ce)
> > +                     break;
> > +
> > +             active = rq;
> > +     }
> > +     rcu_read_unlock();
> > +
> > +     return active;
> > +}
> > +
> >   static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> >   {
> >       return (i915_ggtt_offset(engine->status_page.vma) +
> > @@ -991,6 +1019,56 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
> >               tasklet_schedule(&ve->base.execlists.tasklet);
> >   }
> >   
> > +static void restore_default_state(struct intel_context *ce,
> > +                               struct intel_engine_cs *engine)
> > +{
> > +     u32 *regs = ce->lrc_reg_state;
> > +
> > +     if (engine->pinned_default_state)
> > +             memcpy(regs, /* skip restoring the vanilla PPHWSP */
> > +                    engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> > +                    engine->context_size - PAGE_SIZE);
> > +
> > +     execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> > +}
> > +
> > +static void cancel_active(struct i915_request *rq,
> > +                       struct intel_engine_cs *engine)
> > +{
> > +     struct intel_context * const ce = rq->hw_context;
> > +
> > +     /*
> > +      * The executing context has been cancelled. Fixup the context so that
> > +      * it will be marked as incomplete [-EIO] upon resubmission and not
> > +      * execute any user payloads. We preserve the breadcrumbs and
> > +      * semaphores of the incomplete requests so that inter-timeline
> > +      * dependencies (i.e other timelines) remain correctly ordered.
> > +      *
> > +      * See __i915_request_submit() for applying -EIO and removing the
> > +      * payload on resubmission.
> > +      */
> > +     GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> > +               __func__, engine->name, rq->fence.context, rq->fence.seqno);
> > +     __context_pin_acquire(ce);
> > +
> > +     /* On resubmission of the active request, payload will be scrubbed */
> > +     rq = active_request(rq);
> > +     if (rq)
> > +             ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
> 
> Without this change, where would the head be pointing after 
> schedule_out? Somewhere in the middle of the active request? And with 
> this change it is rewound to the start of it?

RING_HEAD could be anywhere between rq->head and rq->tail. We could just
leave it be, but it would be better if we reset it if it was past
rq->postfix (as we rewrite those instructions). It's just simpler if we
reset it back to the start, and scrub the request.
-Chris
Tvrtko Ursulin Oct. 14, 2019, 12:25 p.m. UTC | #3
On 14/10/2019 13:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
>>
>> On 14/10/2019 10:07, Chris Wilson wrote:
>>> On schedule-out (CS completion) of a banned context, scrub the context
>>> image so that we do not replay the active payload. The intent is that we
>>> skip banned payloads on request submission so that the timeline
>>> advancement continues on in the background. However, if we are returning
>>> to a preempted request, i915_request_skip() is ineffective and instead we
>>> need to patch up the context image so that it continues from the start
>>> of the next request.
>>>
>>> v2: Fixup cancellation so that we only scrub the payload of the active
>>> request and do not short-circuit the breadcrumbs (which might cause
>>> other contexts to execute out of order).
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_lrc.c    | 129 ++++++++----
>>>    drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
>>>    2 files changed, 361 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> index e16ede75412b..b76b35194114 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
>>>                                     const struct intel_engine_cs *engine,
>>>                                     const struct intel_ring *ring,
>>>                                     bool close);
>>> +static void
>>> +__execlists_update_reg_state(const struct intel_context *ce,
>>> +                          const struct intel_engine_cs *engine);
>>>    
>>>    static void cancel_timer(struct timer_list *t)
>>>    {
>>> @@ -270,6 +273,31 @@ static void mark_eio(struct i915_request *rq)
>>>        i915_request_mark_complete(rq);
>>>    }
>>>    
>>> +static struct i915_request *active_request(struct i915_request *rq)
>>> +{
>>> +     const struct intel_context * const ce = rq->hw_context;
>>> +     struct i915_request *active = NULL;
>>> +     struct list_head *list;
>>> +
>>> +     if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
>>> +             return rq;
>>> +
>>> +     rcu_read_lock();
>>> +     list = &rcu_dereference(rq->timeline)->requests;
>>> +     list_for_each_entry_from_reverse(rq, list, link) {
>>> +             if (i915_request_completed(rq))
>>> +                     break;
>>> +
>>> +             if (rq->hw_context != ce)
>>> +                     break;
>>> +
>>> +             active = rq;
>>> +     }
>>> +     rcu_read_unlock();
>>> +
>>> +     return active;
>>> +}
>>> +
>>>    static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
>>>    {
>>>        return (i915_ggtt_offset(engine->status_page.vma) +
>>> @@ -991,6 +1019,56 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>>>                tasklet_schedule(&ve->base.execlists.tasklet);
>>>    }
>>>    
>>> +static void restore_default_state(struct intel_context *ce,
>>> +                               struct intel_engine_cs *engine)
>>> +{
>>> +     u32 *regs = ce->lrc_reg_state;
>>> +
>>> +     if (engine->pinned_default_state)
>>> +             memcpy(regs, /* skip restoring the vanilla PPHWSP */
>>> +                    engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
>>> +                    engine->context_size - PAGE_SIZE);
>>> +
>>> +     execlists_init_reg_state(regs, ce, engine, ce->ring, false);
>>> +}
>>> +
>>> +static void cancel_active(struct i915_request *rq,
>>> +                       struct intel_engine_cs *engine)
>>> +{
>>> +     struct intel_context * const ce = rq->hw_context;
>>> +
>>> +     /*
>>> +      * The executing context has been cancelled. Fixup the context so that
>>> +      * it will be marked as incomplete [-EIO] upon resubmission and not

(read below first)

... and not misleadingly say "Fixup the context so that it will be 
marked as incomplete" because there is nothing in this function which 
does that. It mostly happens by the virtual of context already being 
marked as banned somewhere else. This comment should just explain the 
decision to rewind the ring->head for more determinism. It can still 
mention canceling of user payload and -EIO. Just needs to be clear of 
the single extra thing achieved here by the head rewind and context edit.

>>> +      * execute any user payloads. We preserve the breadcrumbs and
>>> +      * semaphores of the incomplete requests so that inter-timeline
>>> +      * dependencies (i.e other timelines) remain correctly ordered.
>>> +      *
>>> +      * See __i915_request_submit() for applying -EIO and removing the
>>> +      * payload on resubmission.
>>> +      */
>>> +     GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
>>> +               __func__, engine->name, rq->fence.context, rq->fence.seqno);
>>> +     __context_pin_acquire(ce);
>>> +
>>> +     /* On resubmission of the active request, payload will be scrubbed */
>>> +     rq = active_request(rq);
>>> +     if (rq)
>>> +             ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
>>
>> Without this change, where would the head be pointing after
>> schedule_out? Somewhere in the middle of the active request? And with
>> this change it is rewound to the start of it?
> 
> RING_HEAD could be anywhere between rq->head and rq->tail. We could just
> leave it be, but it would be better if we reset it if it was past
> rq->postfix (as we rewrite those instructions). It's just simpler if we
> reset it back to the start, and scrub the request.

Okay, now I follow. I suggest going back up ^^^ and making the comment 
clear...

Regards,

Tvrtko
Chris Wilson Oct. 14, 2019, 12:34 p.m. UTC | #4
Quoting Tvrtko Ursulin (2019-10-14 13:25:58)
> 
> On 14/10/2019 13:06, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
> >>
> >> On 14/10/2019 10:07, Chris Wilson wrote:
> >>> +static void cancel_active(struct i915_request *rq,
> >>> +                       struct intel_engine_cs *engine)
> >>> +{
> >>> +     struct intel_context * const ce = rq->hw_context;
> >>> +
> >>> +     /*
> >>> +      * The executing context has been cancelled. Fixup the context so that
> >>> +      * it will be marked as incomplete [-EIO] upon resubmission and not
> 
> (read below first)
> 
> ... and not misleadingly say "Fixup the context so that it will be 
> marked as incomplete" because there is nothing in this function which 
> does that. It mostly happens by the virtual of context already being 
> marked as banned somewhere else. This comment should just explain the 
> decision to rewind the ring->head for more determinism. It can still 
> mention canceling of user payload and -EIO. Just needs to be clear of 
> the single extra thing achieved here by the head rewind and context edit.

I thought I was clear: "upon resubmission". So use a more active voice to
reduce ambiguity, gotcha.
-Chris
Chris Wilson Oct. 14, 2019, 1:13 p.m. UTC | #5
Quoting Chris Wilson (2019-10-14 13:34:35)
> Quoting Tvrtko Ursulin (2019-10-14 13:25:58)
> > 
> > On 14/10/2019 13:06, Chris Wilson wrote:
> > > Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
> > >>
> > >> On 14/10/2019 10:07, Chris Wilson wrote:
> > >>> +static void cancel_active(struct i915_request *rq,
> > >>> +                       struct intel_engine_cs *engine)
> > >>> +{
> > >>> +     struct intel_context * const ce = rq->hw_context;
> > >>> +
> > >>> +     /*
> > >>> +      * The executing context has been cancelled. Fixup the context so that
> > >>> +      * it will be marked as incomplete [-EIO] upon resubmission and not
> > 
> > (read below first)
> > 
> > ... and not misleadingly say "Fixup the context so that it will be 
> > marked as incomplete" because there is nothing in this function which 
> > does that. It mostly happens by the virtual of context already being 
> > marked as banned somewhere else. This comment should just explain the 
> > decision to rewind the ring->head for more determinism. It can still 
> > mention canceling of user payload and -EIO. Just needs to be clear of 
> > the single extra thing achieved here by the head rewind and context edit.
> 
> I thought I was clear: "upon resubmission". So use a more active voice to
> reduce ambiguity, gotcha.

        /*
         * The executing context has been cancelled. We want to prevent
         * further execution along this context and propagate the error on
         * to anything depending on its results.
         *
         * In __i915_request_submit(), we apply the -EIO and remove the
         * requests payload for any banned requests. But first, we must
         * rewind the context back to the start of the incomplete request so
         * that we don't jump back into the middle of the batch.
         *
         * We preserve the breadcrumbs and semaphores of the incomplete
         * requests so that inter-timeline dependencies (i.e other timelines)
         * remain correctly ordered.
         */

-Chris
Tvrtko Ursulin Oct. 14, 2019, 1:19 p.m. UTC | #6
On 14/10/2019 14:13, Chris Wilson wrote:
> Quoting Chris Wilson (2019-10-14 13:34:35)
>> Quoting Tvrtko Ursulin (2019-10-14 13:25:58)
>>>
>>> On 14/10/2019 13:06, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
>>>>>
>>>>> On 14/10/2019 10:07, Chris Wilson wrote:
>>>>>> +static void cancel_active(struct i915_request *rq,
>>>>>> +                       struct intel_engine_cs *engine)
>>>>>> +{
>>>>>> +     struct intel_context * const ce = rq->hw_context;
>>>>>> +
>>>>>> +     /*
>>>>>> +      * The executing context has been cancelled. Fixup the context so that
>>>>>> +      * it will be marked as incomplete [-EIO] upon resubmission and not
>>>
>>> (read below first)
>>>
>>> ... and not misleadingly say "Fixup the context so that it will be
>>> marked as incomplete" because there is nothing in this function which
>>> does that. It mostly happens by the virtual of context already being
>>> marked as banned somewhere else. This comment should just explain the
>>> decision to rewind the ring->head for more determinism. It can still
>>> mention canceling of user payload and -EIO. Just needs to be clear of
>>> the single extra thing achieved here by the head rewind and context edit.
>>
>> I thought I was clear: "upon resubmission". So use a more active voice to
>> reduce ambiguity, gotcha.
> 
>          /*
>           * The executing context has been cancelled. We want to prevent
>           * further execution along this context and propagate the error on
>           * to anything depending on its results.
>           *
>           * In __i915_request_submit(), we apply the -EIO and remove the
>           * requests payload for any banned requests. But first, we must
>           * rewind the context back to the start of the incomplete request so
>           * that we don't jump back into the middle of the batch.
>           *
>           * We preserve the breadcrumbs and semaphores of the incomplete
>           * requests so that inter-timeline dependencies (i.e other timelines)
>           * remain correctly ordered.
>           */
> 

Sounds good.

Btw.. does this work? :) If context was preempted in the middle of a 
batch buffer there must be some other state saved (other than RING_HEAD) 
which on context restore enables it to continue from the right place 
*within* the batch. Is this code zapping that state as well so GPU will 
fully forget it was inside the batch?

Regards,

Tvrtko
Chris Wilson Oct. 14, 2019, 1:23 p.m. UTC | #7
Quoting Tvrtko Ursulin (2019-10-14 14:19:19)
> 
> On 14/10/2019 14:13, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-10-14 13:34:35)
> >> Quoting Tvrtko Ursulin (2019-10-14 13:25:58)
> >>>
> >>> On 14/10/2019 13:06, Chris Wilson wrote:
> >>>> Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
> >>>>>
> >>>>> On 14/10/2019 10:07, Chris Wilson wrote:
> >>>>>> +static void cancel_active(struct i915_request *rq,
> >>>>>> +                       struct intel_engine_cs *engine)
> >>>>>> +{
> >>>>>> +     struct intel_context * const ce = rq->hw_context;
> >>>>>> +
> >>>>>> +     /*
> >>>>>> +      * The executing context has been cancelled. Fixup the context so that
> >>>>>> +      * it will be marked as incomplete [-EIO] upon resubmission and not
> >>>
> >>> (read below first)
> >>>
> >>> ... and not misleadingly say "Fixup the context so that it will be
> >>> marked as incomplete" because there is nothing in this function which
> >>> does that. It mostly happens by the virtual of context already being
> >>> marked as banned somewhere else. This comment should just explain the
> >>> decision to rewind the ring->head for more determinism. It can still
> >>> mention canceling of user payload and -EIO. Just needs to be clear of
> >>> the single extra thing achieved here by the head rewind and context edit.
> >>
> >> I thought I was clear: "upon resubmission". So use a more active voice to
> >> reduce ambiguity, gotcha.
> > 
> >          /*
> >           * The executing context has been cancelled. We want to prevent
> >           * further execution along this context and propagate the error on
> >           * to anything depending on its results.
> >           *
> >           * In __i915_request_submit(), we apply the -EIO and remove the
> >           * requests payload for any banned requests. But first, we must
> >           * rewind the context back to the start of the incomplete request so
> >           * that we don't jump back into the middle of the batch.
> >           *
> >           * We preserve the breadcrumbs and semaphores of the incomplete
> >           * requests so that inter-timeline dependencies (i.e other timelines)
> >           * remain correctly ordered.
> >           */
> > 
> 
> Sounds good.
> 
> Btw.. does this work? :) If context was preempted in the middle of a 
> batch buffer there must be some other state saved (other than RING_HEAD) 
> which on context restore enables it to continue from the right place 
> *within* the batch. Is this code zapping that state as well so GPU will 
> fully forget it was inside the batch?

Yes. We are resetting the context image back to vanilla, and then
restore the ring registers to restart from this request. The selftests
are using spinning batches to simulate the banned context.
-Chris
Tvrtko Ursulin Oct. 14, 2019, 1:38 p.m. UTC | #8
On 14/10/2019 14:23, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-10-14 14:19:19)
>>
>> On 14/10/2019 14:13, Chris Wilson wrote:
>>> Quoting Chris Wilson (2019-10-14 13:34:35)
>>>> Quoting Tvrtko Ursulin (2019-10-14 13:25:58)
>>>>>
>>>>> On 14/10/2019 13:06, Chris Wilson wrote:
>>>>>> Quoting Tvrtko Ursulin (2019-10-14 13:00:01)
>>>>>>>
>>>>>>> On 14/10/2019 10:07, Chris Wilson wrote:
>>>>>>>> +static void cancel_active(struct i915_request *rq,
>>>>>>>> +                       struct intel_engine_cs *engine)
>>>>>>>> +{
>>>>>>>> +     struct intel_context * const ce = rq->hw_context;
>>>>>>>> +
>>>>>>>> +     /*
>>>>>>>> +      * The executing context has been cancelled. Fixup the context so that
>>>>>>>> +      * it will be marked as incomplete [-EIO] upon resubmission and not
>>>>>
>>>>> (read below first)
>>>>>
>>>>> ... and not misleadingly say "Fixup the context so that it will be
>>>>> marked as incomplete" because there is nothing in this function which
>>>>> does that. It mostly happens by the virtual of context already being
>>>>> marked as banned somewhere else. This comment should just explain the
>>>>> decision to rewind the ring->head for more determinism. It can still
>>>>> mention canceling of user payload and -EIO. Just needs to be clear of
>>>>> the single extra thing achieved here by the head rewind and context edit.
>>>>
>>>> I thought I was clear: "upon resubmission". So use a more active voice to
>>>> reduce ambiguity, gotcha.
>>>
>>>           /*
>>>            * The executing context has been cancelled. We want to prevent
>>>            * further execution along this context and propagate the error on
>>>            * to anything depending on its results.
>>>            *
>>>            * In __i915_request_submit(), we apply the -EIO and remove the
>>>            * requests payload for any banned requests. But first, we must
>>>            * rewind the context back to the start of the incomplete request so
>>>            * that we don't jump back into the middle of the batch.
>>>            *
>>>            * We preserve the breadcrumbs and semaphores of the incomplete
>>>            * requests so that inter-timeline dependencies (i.e other timelines)
>>>            * remain correctly ordered.
>>>            */
>>>
>>
>> Sounds good.
>>
>> Btw.. does this work? :) If context was preempted in the middle of a
>> batch buffer there must be some other state saved (other than RING_HEAD)
>> which on context restore enables it to continue from the right place
>> *within* the batch. Is this code zapping that state as well so GPU will
>> fully forget it was inside the batch?
> 
> Yes. We are resetting the context image back to vanilla, and then
> restore the ring registers to restart from this request. The selftests
> are using spinning batches to simulate the banned context.

Okay, in that case:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e16ede75412b..b76b35194114 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,9 @@  static void execlists_init_reg_state(u32 *reg_state,
 				     const struct intel_engine_cs *engine,
 				     const struct intel_ring *ring,
 				     bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+			     const struct intel_engine_cs *engine);
 
 static void cancel_timer(struct timer_list *t)
 {
@@ -270,6 +273,31 @@  static void mark_eio(struct i915_request *rq)
 	i915_request_mark_complete(rq);
 }
 
+static struct i915_request *active_request(struct i915_request *rq)
+{
+	const struct intel_context * const ce = rq->hw_context;
+	struct i915_request *active = NULL;
+	struct list_head *list;
+
+	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+		return rq;
+
+	rcu_read_lock();
+	list = &rcu_dereference(rq->timeline)->requests;
+	list_for_each_entry_from_reverse(rq, list, link) {
+		if (i915_request_completed(rq))
+			break;
+
+		if (rq->hw_context != ce)
+			break;
+
+		active = rq;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -991,6 +1019,56 @@  static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 		tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static void restore_default_state(struct intel_context *ce,
+				  struct intel_engine_cs *engine)
+{
+	u32 *regs = ce->lrc_reg_state;
+
+	if (engine->pinned_default_state)
+		memcpy(regs, /* skip restoring the vanilla PPHWSP */
+		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+		       engine->context_size - PAGE_SIZE);
+
+	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void cancel_active(struct i915_request *rq,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context * const ce = rq->hw_context;
+
+	/*
+	 * The executing context has been cancelled. Fixup the context so that
+	 * it will be marked as incomplete [-EIO] upon resubmission and not
+	 * execute any user payloads. We preserve the breadcrumbs and
+	 * semaphores of the incomplete requests so that inter-timeline
+	 * dependencies (i.e other timelines) remain correctly ordered.
+	 *
+	 * See __i915_request_submit() for applying -EIO and removing the
+	 * payload on resubmission.
+	 */
+	GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+		  __func__, engine->name, rq->fence.context, rq->fence.seqno);
+	__context_pin_acquire(ce);
+
+	/* On resubmission of the active request, payload will be scrubbed */
+	rq = active_request(rq);
+	if (rq)
+		ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+	else
+		ce->ring->head = ce->ring->tail;
+	intel_ring_update_space(ce->ring);
+
+	/* Scrub the context image to prevent replaying the previous batch */
+	restore_default_state(ce, engine);
+	__execlists_update_reg_state(ce, engine);
+
+	/* We've switched away, so this should be a no-op, but intent matters */
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+
+	__context_pin_release(ce);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
@@ -1001,6 +1079,9 @@  __execlists_schedule_out(struct i915_request *rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put(engine->gt);
 
+	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+		cancel_active(rq, engine);
+
 	/*
 	 * If this is part of a virtual engine, its next request may
 	 * have been blocked waiting for access to the active context.
@@ -1395,6 +1476,10 @@  static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
 	if (!rq)
 		return 0;
 
+	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
+	if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
+		return 1;
+
 	return READ_ONCE(engine->props.preempt_timeout);
 }
 
@@ -2810,29 +2895,6 @@  static void reset_csb_pointers(struct intel_engine_cs *engine)
 			       &execlists->csb_status[reset_value]);
 }
 
-static struct i915_request *active_request(struct i915_request *rq)
-{
-	const struct intel_context * const ce = rq->hw_context;
-	struct i915_request *active = NULL;
-	struct list_head *list;
-
-	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
-		return rq;
-
-	list = &i915_request_active_timeline(rq)->requests;
-	list_for_each_entry_from_reverse(rq, list, link) {
-		if (i915_request_completed(rq))
-			break;
-
-		if (rq->hw_context != ce)
-			break;
-
-		active = rq;
-	}
-
-	return active;
-}
-
 static void __execlists_reset_reg_state(const struct intel_context *ce,
 					const struct intel_engine_cs *engine)
 {
@@ -2849,7 +2911,6 @@  static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	u32 *regs;
 
 	mb(); /* paranoia: read the CSB pointers from after the reset */
 	clflush(execlists->csb_write);
@@ -2928,13 +2989,7 @@  static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * to recreate its own state.
 	 */
 	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	regs = ce->lrc_reg_state;
-	if (engine->pinned_default_state) {
-		memcpy(regs, /* skip restoring the vanilla PPHWSP */
-		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
-		       engine->context_size - PAGE_SIZE);
-	}
-	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+	restore_default_state(ce, engine);
 
 out_replay:
 	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
@@ -4571,16 +4626,8 @@  void intel_lr_context_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	if (scrub) {
-		u32 *regs = ce->lrc_reg_state;
-
-		if (engine->pinned_default_state) {
-			memcpy(regs, /* skip restoring the vanilla PPHWSP */
-			       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
-			       engine->context_size - PAGE_SIZE);
-		}
-		execlists_init_reg_state(regs, ce, engine, ce->ring, false);
-	}
+	if (scrub)
+		restore_default_state(ce, engine);
 
 	/* Rerun the request; its payload has been neutered (if guilty). */
 	ce->ring->head = head;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index e7a86f60cf82..dd3e7b47b76d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@ 
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_reset.h"
 
 #include "i915_selftest.h"
@@ -1016,6 +1017,277 @@  static int live_nopreempt(void *arg)
 	goto err_client_b;
 }
 
+struct live_preempt_cancel {
+	struct intel_engine_cs *engine;
+	struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq;
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP0 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+	rq = spinner_create_request(&arg->a.spin,
+				    arg->a.ctx, arg->engine,
+				    MI_ARB_CHECK);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+		err = -EIO;
+		goto out;
+	}
+
+	i915_gem_context_set_banned(arg->a.ctx);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[2] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP1 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_NOOP); /* no preemption */
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
+	rq[1] = spinner_create_request(&arg->b.spin,
+				       arg->b.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	i915_gem_context_set_banned(arg->b.ctx);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	igt_spinner_end(&arg->a.spin);
+	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != 0) {
+		pr_err("Normal inflight0 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != -EIO) {
+		pr_err("Cancelled inflight1 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[3] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Full ELSP and one in the wings */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
+	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	rq[2] = spinner_create_request(&arg->b.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[2])) {
+		err = PTR_ERR(rq[2]);
+		goto out;
+	}
+
+	i915_request_get(rq[2]);
+	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+	i915_request_add(rq[2]);
+	if (err)
+		goto out;
+
+	i915_gem_context_set_banned(arg->a.ctx);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != 0) {
+		pr_err("Normal inflight1 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[2]->fence.error != -EIO) {
+		pr_err("Cancelled queued request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[2]);
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct live_preempt_cancel data;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * To cancel an inflight context, we need to first remove it from the
+	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	if (preempt_client_init(i915, &data.a))
+		return -ENOMEM;
+	if (preempt_client_init(i915, &data.b))
+		goto err_client_a;
+
+	for_each_engine(data.engine, i915, id) {
+		if (!intel_engine_has_preemption(data.engine))
+			continue;
+
+		err = __cancel_active0(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_active1(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_queued(&data);
+		if (err)
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&data.b);
+err_client_a:
+	preempt_client_fini(&data.a);
+	return err;
+
+err_wedged:
+	GEM_TRACE_DUMP();
+	igt_spinner_end(&data.b.spin);
+	igt_spinner_end(&data.a.spin);
+	intel_gt_set_wedged(&i915->gt);
+	goto err_client_b;
+}
+
 static int live_suppress_self_preempt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -2549,6 +2821,7 @@  int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_nopreempt),
+		SUBTEST(live_preempt_cancel),
 		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),