[v3] drm/i915/execlists: Reclaim the hanging virtual request
diff mbox series

Message ID 20200121130411.267092-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [v3] drm/i915/execlists: Reclaim the hanging virtual request
Related show

Commit Message

Chris Wilson Jan. 21, 2020, 1:04 p.m. UTC
If we encounter a hang on a virtual engine, as we process the hang the
request may already have been moved back to the virtual engine (we are
processing the hang on the physical engine). We need to reclaim the
request from the virtual engine so that the locking is consistent and
local to the real engine on which we will hold the request for error
state capturing.

v2: Pull the reclamation into execlists_hold() and assert that cannot be
called from outside of the reset (i.e. with the tasklet disabled).
v3: Added selftest

Fixes: 748317386afb ("drm/i915/execlists: Offline error capture")
Testcase: igt/gem_exec_balancer/hang
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  29 +++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 158 ++++++++++++++++++++++++-
 2 files changed, 186 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin Jan. 21, 2020, 1:55 p.m. UTC | #1
On 21/01/2020 13:04, Chris Wilson wrote:
> If we encounter a hang on a virtual engine, as we process the hang the
> request may already have been moved back to the virtual engine (we are
> processing the hang on the physical engine). We need to reclaim the
> request from the virtual engine so that the locking is consistent and
> local to the real engine on which we will hold the request for error
> state capturing.
> 
> v2: Pull the reclamation into execlists_hold() and assert that cannot be
> called from outside of the reset (i.e. with the tasklet disabled).
> v3: Added selftest
> 
> Fixes: 748317386afb ("drm/i915/execlists: Offline error capture")
> Testcase: igt/gem_exec_balancer/hang
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    |  29 +++++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 158 ++++++++++++++++++++++++-
>   2 files changed, 186 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3a30767ff0c4..11dfee4fe9ea 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2396,6 +2396,35 @@ static void __execlists_hold(struct i915_request *rq)
>   static void execlists_hold(struct intel_engine_cs *engine,
>   			   struct i915_request *rq)
>   {
> +	if (rq->engine != engine) { /* preempted virtual engine */
> +		struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +		unsigned long flags;
> +
> +		/*
> +		 * intel_context_inflight() is only protected by virtue
> +		 * of process_csb() being called only by the tasklet (or
> +		 * directly from inside reset while the tasklet is suspended).
> +		 * Assert that neither of those are allowed to run while we
> +		 * poke at the request queues.
> +		 */

I was thinking or execlists_dequeue, but I forgot there is no direct 
submission any more.

> +		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
> +
> +		/*
> +		 * An unsubmitted request along a virtual engine will
> +		 * remain on the active (this) engine until we are able
> +		 * to process the context switch away (and so mark the
> +		 * context as no longer in flight). That cannot have happened
> +		 * yet, otherwise we would not be hanging!
> +		 */
> +		spin_lock_irqsave(&ve->base.active.lock, flags);
> +		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
> +		GEM_BUG_ON(ve->request != rq);
> +		ve->request = NULL;
> +		spin_unlock_irqrestore(&ve->base.active.lock, flags);
> +
> +		rq->engine = engine;

Lets see I understand this... tasklet has been disabled and ring paused. 
But we find an uncompleted request in the ELSP context, with rq->engine 
== virtual engine. Therefore this cannot be the first request on this 
timeline but has to be later. One which has been put on the runqueue but 
not yet submitted to hw. (Because one at a time.) Or it has been 
unsubmitted by __unwind_incomplete_request already. In the former case 
why move it to the physical engine? Also in the latter actually, it 
would overwrite rq->engine with the physical one.

> +	}
> +
>   	spin_lock_irq(&engine->active.lock);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index b208c2176bbd..1beb47c3ba9e 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -335,7 +335,6 @@ static int live_hold_reset(void *arg)
>   
>   		if (test_and_set_bit(I915_RESET_ENGINE + id,
>   				     &gt->reset.flags)) {
> -			spin_unlock_irq(&engine->active.lock);
>   			intel_gt_set_wedged(gt);
>   			err = -EBUSY;
>   			goto out;
> @@ -3411,6 +3410,162 @@ static int live_virtual_bond(void *arg)
>   	return 0;
>   }
>   
> +static int reset_virtual_engine(struct intel_gt *gt,
> +				struct intel_engine_cs **siblings,
> +				unsigned int nsibling)
> +{
> +	struct intel_engine_cs *engine;
> +	struct intel_context *ve;
> +	unsigned long *heartbeat;
> +	struct igt_spinner spin;
> +	struct i915_request *rq;
> +	unsigned int n;
> +	int err = 0;
> +
> +	/*
> +	 * In order to support offline error capture for fast preempt reset,
> +	 * we need to decouple the guilty request and ensure that it and its
> +	 * descendents are not executed while the capture is in progress.
> +	 */
> +
> +	if (!intel_has_reset_engine(gt))
> +		return 0;
> +
> +	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
> +	if (!heartbeat)
> +		return -ENOMEM;
> +
> +	if (igt_spinner_init(&spin, gt)) {
> +		err = -ENOMEM;
> +		goto out_heartbeat;
> +	}
> +
> +	ve = intel_execlists_create_virtual(siblings, nsibling);
> +	if (IS_ERR(ve)) {
> +		err = PTR_ERR(ve);
> +		goto out_spin;
> +	}
> +
> +	for (n = 0; n < nsibling; n++)
> +		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
> +
> +	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
> +	if (IS_ERR(rq)) {
> +		err = PTR_ERR(rq);
> +		goto out;
> +	}
> +	i915_request_add(rq);
> +
> +	if (!igt_wait_for_spinner(&spin, rq)) {
> +		intel_gt_set_wedged(gt);
> +		err = -ETIME;
> +		goto out;
> +	}
> +
> +	engine = rq->engine;
> +	GEM_BUG_ON(engine == ve->engine);
> +
> +	/* Take ownership of the reset and tasklet */
> +	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
> +			     &gt->reset.flags)) {
> +		intel_gt_set_wedged(gt);
> +		err = -EBUSY;
> +		goto out;
> +	}
> +	tasklet_disable(&engine->execlists.tasklet);
> +
> +	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> +	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
> +
> +	/* Fake a preemption event; failed of course */
> +	spin_lock_irq(&engine->active.lock);
> +	__unwind_incomplete_requests(engine);
> +	spin_unlock_irq(&engine->active.lock);
> +
> +	/* Reset the engine while keeping our active request on hold */
> +	execlists_hold(engine, rq);
> +	GEM_BUG_ON(!i915_request_on_hold(rq));
> +
> +	intel_engine_reset(engine, NULL);
> +	GEM_BUG_ON(rq->fence.error != -EIO);
> +
> +	/* Release our grasp on the engine, letting CS flow again */
> +	tasklet_enable(&engine->execlists.tasklet);
> +	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
> +
> +	/* Check that we do not resubmit the held request */
> +	i915_request_get(rq);
> +	if (!i915_request_wait(rq, 0, HZ / 5)) {
> +		pr_err("%s: on hold request completed!\n",
> +		       engine->name);
> +		i915_request_put(rq);
> +		err = -EIO;
> +		goto out;
> +	}
> +	GEM_BUG_ON(!i915_request_on_hold(rq));
> +
> +	/* But is resubmitted on release */
> +	execlists_unhold(engine, rq);
> +	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +		pr_err("%s: held request did not complete!\n",
> +		       engine->name);
> +		intel_gt_set_wedged(gt);
> +		err = -ETIME;
> +	}
> +	i915_request_put(rq);
> +
> +out:
> +	for (n = 0; n < nsibling; n++)
> +		engine_heartbeat_enable(siblings[n], heartbeat[n]);
> +
> +	intel_context_put(ve);
> +out_spin:
> +	igt_spinner_fini(&spin);
> +out_heartbeat:
> +	kfree(heartbeat);
> +	return err;
> +}
> +
> +static int live_virtual_reset(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +
> +	/*
> +	 * Check that the context image retains non-privileged (user) registers
> +	 * from one engine to the next. For this we check that the CS_GPR
> +	 * are preserved.
> +	 */

Zap.

> +
> +	if (USES_GUC_SUBMISSION(gt->i915))
> +		return 0;

Who will remember to turn these on hm...

> +
> +	/* As we use CS_GPR we cannot run before they existed on all engines. */

Zap.

> +	if (INTEL_GEN(gt->i915) < 9)
> +		return 0;
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		int nsibling, err;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!gt->engine_class[class][inst])
> +				continue;
> +
> +			siblings[nsibling++] = gt->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		err = reset_virtual_engine(gt, siblings, nsibling);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -3436,6 +3591,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_virtual_mask),
>   		SUBTEST(live_virtual_preserved),
>   		SUBTEST(live_virtual_bond),
> +		SUBTEST(live_virtual_reset),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
>
Regards,

Tvrtko
Chris Wilson Jan. 21, 2020, 2:07 p.m. UTC | #2
Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
> 
> 
> On 21/01/2020 13:04, Chris Wilson wrote:
> > +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
> > +
> > +             /*
> > +              * An unsubmitted request along a virtual engine will
> > +              * remain on the active (this) engine until we are able
> > +              * to process the context switch away (and so mark the
> > +              * context as no longer in flight). That cannot have happened
> > +              * yet, otherwise we would not be hanging!
> > +              */
> > +             spin_lock_irqsave(&ve->base.active.lock, flags);
> > +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
> > +             GEM_BUG_ON(ve->request != rq);
> > +             ve->request = NULL;
> > +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
> > +
> > +             rq->engine = engine;
> 
> Lets see I understand this... tasklet has been disabled and ring paused. 
> But we find an uncompleted request in the ELSP context, with rq->engine 
> == virtual engine. Therefore this cannot be the first request on this 
> timeline but has to be later.

Not quite.

engine->execlists.active[] tracks the HW, it get's updated only upon
receiving HW acks (or we reset).

So if execlists_active()->engine == virtual, it can only mean that the
inflight _hanging_ request has already been unsubmitted by an earlier
preemption in execlists_dequeue(), but that preemption has not yet been
processed by the HW. (Hence the preemption-reset underway.)

Now while we coalesce the requests for a context into a single ELSP[]
slot, and only record the last request submitted for a context, we have
to walk back along that context's timeline to find the earliest
incomplete request and blame the hang upon it.

For a virtual engine, it's much simpler as there is only ever one
request in flight, but I don't think that has any impact here other
than that we only need to repair the single unsubmitted request that was
returned to the virtual engine.

> One which has been put on the runqueue but 
> not yet submitted to hw. (Because one at a time.) Or it has been 
> unsubmitted by __unwind_incomplete_request already. In the former case 
> why move it to the physical engine? Also in the latter actually, it 
> would overwrite rq->engine with the physical one.

Yes. For incomplete preemption event, the request is *still* on this
engine and has not been released (rq->context->inflight == engine, so it
cannot be submitted to any other engine, until after we acknowledge the
context has been saved and is no longer being accessed by HW.) It is
legal for us to process the hanging request along this engine; we have a
suboptimal decision to return the request to the same engine after the
reset, but since we have replaced the hanging payload, the request is a
mere signaling placeholder (and I do not think will overly burden the
system and negatively impact other virtual engines).
-Chris
Tvrtko Ursulin Jan. 21, 2020, 5:19 p.m. UTC | #3
On 21/01/2020 14:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
>>
>>
>> On 21/01/2020 13:04, Chris Wilson wrote:
>>> +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
>>> +
>>> +             /*
>>> +              * An unsubmitted request along a virtual engine will
>>> +              * remain on the active (this) engine until we are able
>>> +              * to process the context switch away (and so mark the
>>> +              * context as no longer in flight). That cannot have happened
>>> +              * yet, otherwise we would not be hanging!
>>> +              */
>>> +             spin_lock_irqsave(&ve->base.active.lock, flags);
>>> +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
>>> +             GEM_BUG_ON(ve->request != rq);
>>> +             ve->request = NULL;
>>> +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
>>> +
>>> +             rq->engine = engine;
>>
>> Lets see I understand this... tasklet has been disabled and ring paused.
>> But we find an uncompleted request in the ELSP context, with rq->engine
>> == virtual engine. Therefore this cannot be the first request on this
>> timeline but has to be later.
> 
> Not quite.
> 
> engine->execlists.active[] tracks the HW, it get's updated only upon
> receiving HW acks (or we reset).
> 
> So if execlists_active()->engine == virtual, it can only mean that the
> inflight _hanging_ request has already been unsubmitted by an earlier
> preemption in execlists_dequeue(), but that preemption has not yet been
> processed by the HW. (Hence the preemption-reset underway.)
> 
> Now while we coalesce the requests for a context into a single ELSP[]
> slot, and only record the last request submitted for a context, we have
> to walk back along that context's timeline to find the earliest
> incomplete request and blame the hang upon it.
> 
> For a virtual engine, it's much simpler as there is only ever one
> request in flight, but I don't think that has any impact here other
> than that we only need to repair the single unsubmitted request that was
> returned to the virtual engine.
> 
>> One which has been put on the runqueue but
>> not yet submitted to hw. (Because one at a time.) Or it has been
>> unsubmitted by __unwind_incomplete_request already. In the former case
>> why move it to the physical engine? Also in the latter actually, it
>> would overwrite rq->engine with the physical one.
> 
> Yes. For incomplete preemption event, the request is *still* on this
> engine and has not been released (rq->context->inflight == engine, so it
> cannot be submitted to any other engine, until after we acknowledge the
> context has been saved and is no longer being accessed by HW.) It is
> legal for us to process the hanging request along this engine; we have a
> suboptimal decision to return the request to the same engine after the
> reset, but since we have replaced the hanging payload, the request is a
> mere signaling placeholder (and I do not think will overly burden the
> system and negatively impact other virtual engines).

What if the request in elsp actually completed in the meantime eg. 
preemption timeout was a false positive?

In execlists_capture we do:

	cap->rq = execlists_active(&engine->execlists);

This gets a completed request, then we do:

	cap->rq = active_request(cap->rq->context->timeline, cap->rq);

This walks along the virtual timeline and finds a next virtual request. 
It then binds this request to a physical engine and sets ve->request to 
NULL.

Then on unhold ve->submit_notify is called which sets ve->request to 
this request but the rq->engine points to the physical engine.

Regards,

Tvrtko
Chris Wilson Jan. 21, 2020, 5:32 p.m. UTC | #4
Quoting Tvrtko Ursulin (2020-01-21 17:19:52)
> 
> On 21/01/2020 14:07, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
> >>
> >>
> >> On 21/01/2020 13:04, Chris Wilson wrote:
> >>> +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
> >>> +
> >>> +             /*
> >>> +              * An unsubmitted request along a virtual engine will
> >>> +              * remain on the active (this) engine until we are able
> >>> +              * to process the context switch away (and so mark the
> >>> +              * context as no longer in flight). That cannot have happened
> >>> +              * yet, otherwise we would not be hanging!
> >>> +              */
> >>> +             spin_lock_irqsave(&ve->base.active.lock, flags);
> >>> +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
> >>> +             GEM_BUG_ON(ve->request != rq);
> >>> +             ve->request = NULL;
> >>> +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
> >>> +
> >>> +             rq->engine = engine;
> >>
> >> Lets see I understand this... tasklet has been disabled and ring paused.
> >> But we find an uncompleted request in the ELSP context, with rq->engine
> >> == virtual engine. Therefore this cannot be the first request on this
> >> timeline but has to be later.
> > 
> > Not quite.
> > 
> > engine->execlists.active[] tracks the HW, it get's updated only upon
> > receiving HW acks (or we reset).
> > 
> > So if execlists_active()->engine == virtual, it can only mean that the
> > inflight _hanging_ request has already been unsubmitted by an earlier
> > preemption in execlists_dequeue(), but that preemption has not yet been
> > processed by the HW. (Hence the preemption-reset underway.)
> > 
> > Now while we coalesce the requests for a context into a single ELSP[]
> > slot, and only record the last request submitted for a context, we have
> > to walk back along that context's timeline to find the earliest
> > incomplete request and blame the hang upon it.
> > 
> > For a virtual engine, it's much simpler as there is only ever one
> > request in flight, but I don't think that has any impact here other
> > than that we only need to repair the single unsubmitted request that was
> > returned to the virtual engine.
> > 
> >> One which has been put on the runqueue but
> >> not yet submitted to hw. (Because one at a time.) Or it has been
> >> unsubmitted by __unwind_incomplete_request already. In the former case
> >> why move it to the physical engine? Also in the latter actually, it
> >> would overwrite rq->engine with the physical one.
> > 
> > Yes. For incomplete preemption event, the request is *still* on this
> > engine and has not been released (rq->context->inflight == engine, so it
> > cannot be submitted to any other engine, until after we acknowledge the
> > context has been saved and is no longer being accessed by HW.) It is
> > legal for us to process the hanging request along this engine; we have a
> > suboptimal decision to return the request to the same engine after the
> > reset, but since we have replaced the hanging payload, the request is a
> > mere signaling placeholder (and I do not think will overly burden the
> > system and negatively impact other virtual engines).
> 
> What if the request in elsp actually completed in the meantime eg. 
> preemption timeout was a false positive?
> 
> In execlists_capture we do:
> 
>         cap->rq = execlists_active(&engine->execlists);
> 
> This gets a completed request, then we do:
> 
>         cap->rq = active_request(cap->rq->context->timeline, cap->rq);
> 
> This walks along the virtual timeline and finds a next virtual request. 
> It then binds this request to a physical engine and sets ve->request to 
> NULL.

If we miss the completion event, then active_request() returns the
original request and we blame it for a having a 650ms preemption-off
shader with a 640ms preemption timeout.

> Then on unhold ve->submit_notify is called which sets ve->request to 
> this request but the rq->engine points to the physical engine.

We don't call ve->submit_notify() on unhold, we put it back into our
local priority queue. Keeping ownership of the request on the local
engine seemed to the easiest way to keep track of the locking, and
re-submitting the guilty request on the same engine should not be an
issue.
-Chris
Tvrtko Ursulin Jan. 21, 2020, 5:43 p.m. UTC | #5
On 21/01/2020 17:32, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-01-21 17:19:52)
>>
>> On 21/01/2020 14:07, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
>>>>
>>>>
>>>> On 21/01/2020 13:04, Chris Wilson wrote:
>>>>> +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
>>>>> +
>>>>> +             /*
>>>>> +              * An unsubmitted request along a virtual engine will
>>>>> +              * remain on the active (this) engine until we are able
>>>>> +              * to process the context switch away (and so mark the
>>>>> +              * context as no longer in flight). That cannot have happened
>>>>> +              * yet, otherwise we would not be hanging!
>>>>> +              */
>>>>> +             spin_lock_irqsave(&ve->base.active.lock, flags);
>>>>> +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
>>>>> +             GEM_BUG_ON(ve->request != rq);
>>>>> +             ve->request = NULL;
>>>>> +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
>>>>> +
>>>>> +             rq->engine = engine;
>>>>
>>>> Lets see I understand this... tasklet has been disabled and ring paused.
>>>> But we find an uncompleted request in the ELSP context, with rq->engine
>>>> == virtual engine. Therefore this cannot be the first request on this
>>>> timeline but has to be later.
>>>
>>> Not quite.
>>>
>>> engine->execlists.active[] tracks the HW, it get's updated only upon
>>> receiving HW acks (or we reset).
>>>
>>> So if execlists_active()->engine == virtual, it can only mean that the
>>> inflight _hanging_ request has already been unsubmitted by an earlier
>>> preemption in execlists_dequeue(), but that preemption has not yet been
>>> processed by the HW. (Hence the preemption-reset underway.)
>>>
>>> Now while we coalesce the requests for a context into a single ELSP[]
>>> slot, and only record the last request submitted for a context, we have
>>> to walk back along that context's timeline to find the earliest
>>> incomplete request and blame the hang upon it.
>>>
>>> For a virtual engine, it's much simpler as there is only ever one
>>> request in flight, but I don't think that has any impact here other
>>> than that we only need to repair the single unsubmitted request that was
>>> returned to the virtual engine.
>>>
>>>> One which has been put on the runqueue but
>>>> not yet submitted to hw. (Because one at a time.) Or it has been
>>>> unsubmitted by __unwind_incomplete_request already. In the former case
>>>> why move it to the physical engine? Also in the latter actually, it
>>>> would overwrite rq->engine with the physical one.
>>>
>>> Yes. For incomplete preemption event, the request is *still* on this
>>> engine and has not been released (rq->context->inflight == engine, so it
>>> cannot be submitted to any other engine, until after we acknowledge the
>>> context has been saved and is no longer being accessed by HW.) It is
>>> legal for us to process the hanging request along this engine; we have a
>>> suboptimal decision to return the request to the same engine after the
>>> reset, but since we have replaced the hanging payload, the request is a
>>> mere signaling placeholder (and I do not think will overly burden the
>>> system and negatively impact other virtual engines).
>>
>> What if the request in elsp actually completed in the meantime eg.
>> preemption timeout was a false positive?
>>
>> In execlists_capture we do:
>>
>>          cap->rq = execlists_active(&engine->execlists);
>>
>> This gets a completed request, then we do:
>>
>>          cap->rq = active_request(cap->rq->context->timeline, cap->rq);
>>
>> This walks along the virtual timeline and finds a next virtual request.
>> It then binds this request to a physical engine and sets ve->request to
>> NULL.
> 
> If we miss the completion event, then active_request() returns the
> original request and we blame it for a having a 650ms preemption-off
> shader with a 640ms preemption timeout.

I am thinking of this sequence of interleaved events:

	preempt_timeout
				tasklet_disable
				ring_pause
				execlist_active
	seqno write visible
				active_request - walks the tl to next
				execlist_hold
				schedule_worker
				tasklet_enable
	process_csb completed

This is not possible? Seqno write happening needs only to be roughly 
there since as long as tasklet has been disabled execlist->active 
remains fixed.

>> Then on unhold ve->submit_notify is called which sets ve->request to
>> this request but the rq->engine points to the physical engine.
> 
> We don't call ve->submit_notify() on unhold, we put it back into our
> local priority queue. Keeping ownership of the request on the local
> engine seemed to the easiest way to keep track of the locking, and
> re-submitting the guilty request on the same engine should not be an
> issue.

True, I am jumping between different things and have confused this bit.

Regards,

Tvrtko
Chris Wilson Jan. 21, 2020, 5:57 p.m. UTC | #6
Quoting Tvrtko Ursulin (2020-01-21 17:43:37)
> 
> On 21/01/2020 17:32, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-01-21 17:19:52)
> >>
> >> On 21/01/2020 14:07, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
> >>>>
> >>>>
> >>>> On 21/01/2020 13:04, Chris Wilson wrote:
> >>>>> +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
> >>>>> +
> >>>>> +             /*
> >>>>> +              * An unsubmitted request along a virtual engine will
> >>>>> +              * remain on the active (this) engine until we are able
> >>>>> +              * to process the context switch away (and so mark the
> >>>>> +              * context as no longer in flight). That cannot have happened
> >>>>> +              * yet, otherwise we would not be hanging!
> >>>>> +              */
> >>>>> +             spin_lock_irqsave(&ve->base.active.lock, flags);
> >>>>> +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
> >>>>> +             GEM_BUG_ON(ve->request != rq);
> >>>>> +             ve->request = NULL;
> >>>>> +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
> >>>>> +
> >>>>> +             rq->engine = engine;
> >>>>
> >>>> Lets see I understand this... tasklet has been disabled and ring paused.
> >>>> But we find an uncompleted request in the ELSP context, with rq->engine
> >>>> == virtual engine. Therefore this cannot be the first request on this
> >>>> timeline but has to be later.
> >>>
> >>> Not quite.
> >>>
> >>> engine->execlists.active[] tracks the HW, it get's updated only upon
> >>> receiving HW acks (or we reset).
> >>>
> >>> So if execlists_active()->engine == virtual, it can only mean that the
> >>> inflight _hanging_ request has already been unsubmitted by an earlier
> >>> preemption in execlists_dequeue(), but that preemption has not yet been
> >>> processed by the HW. (Hence the preemption-reset underway.)
> >>>
> >>> Now while we coalesce the requests for a context into a single ELSP[]
> >>> slot, and only record the last request submitted for a context, we have
> >>> to walk back along that context's timeline to find the earliest
> >>> incomplete request and blame the hang upon it.
> >>>
> >>> For a virtual engine, it's much simpler as there is only ever one
> >>> request in flight, but I don't think that has any impact here other
> >>> than that we only need to repair the single unsubmitted request that was
> >>> returned to the virtual engine.
> >>>
> >>>> One which has been put on the runqueue but
> >>>> not yet submitted to hw. (Because one at a time.) Or it has been
> >>>> unsubmitted by __unwind_incomplete_request already. In the former case
> >>>> why move it to the physical engine? Also in the latter actually, it
> >>>> would overwrite rq->engine with the physical one.
> >>>
> >>> Yes. For incomplete preemption event, the request is *still* on this
> >>> engine and has not been released (rq->context->inflight == engine, so it
> >>> cannot be submitted to any other engine, until after we acknowledge the
> >>> context has been saved and is no longer being accessed by HW.) It is
> >>> legal for us to process the hanging request along this engine; we have a
> >>> suboptimal decision to return the request to the same engine after the
> >>> reset, but since we have replaced the hanging payload, the request is a
> >>> mere signaling placeholder (and I do not think will overly burden the
> >>> system and negatively impact other virtual engines).
> >>
> >> What if the request in elsp actually completed in the meantime eg.
> >> preemption timeout was a false positive?
> >>
> >> In execlists_capture we do:
> >>
> >>          cap->rq = execlists_active(&engine->execlists);
> >>
> >> This gets a completed request, then we do:
> >>
> >>          cap->rq = active_request(cap->rq->context->timeline, cap->rq);
> >>
> >> This walks along the virtual timeline and finds a next virtual request.
> >> It then binds this request to a physical engine and sets ve->request to
> >> NULL.
> > 
> > If we miss the completion event, then active_request() returns the
> > original request and we blame it for a having a 650ms preemption-off
> > shader with a 640ms preemption timeout.
> 
> I am thinking of this sequence of interleaved events:
> 
>         preempt_timeout
>                                 tasklet_disable
>                                 ring_pause
>                                 execlist_active
>         seqno write visible
>                                 active_request - walks the tl to next

... tries to walk to next, sees no incomplete request, returns original
request.

static struct i915_request *
active_request(const struct intel_timeline * const tl, struct i915_request *rq)
{
        struct i915_request *active = rq;
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this sneaky line

        rcu_read_lock();
        list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
                if (i915_request_completed(rq))
                        break;

                active = rq;
		^^^^^^^^^^^^ these too may complete at any moment after
		our inspection


        }
        rcu_read_unlock();

        return active;
}

>                                 execlist_hold
>                                 schedule_worker
>                                 tasklet_enable
>         process_csb completed
> 
> This is not possible? Seqno write happening needs only to be roughly 
> there since as long as tasklet has been disabled execlist->active 
> remains fixed.

It's certainly possible, the requests do keep going on the HW up until
the next semaphore (which is after the seqno write). That is taken into
account in that we may end up trying to reset a completed request, which
should be avoided in execlists_reset() [after the HW has processed the
reset request], but we capture the request anyway and put it back for
execution (which is avoided in execlists_dequeue). Isn't preempt-to-busy
fun?
-Chris
Tvrtko Ursulin Jan. 22, 2020, 1:32 p.m. UTC | #7
On 21/01/2020 17:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-01-21 17:43:37)
>>
>> On 21/01/2020 17:32, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-01-21 17:19:52)
>>>>
>>>> On 21/01/2020 14:07, Chris Wilson wrote:
>>>>> Quoting Tvrtko Ursulin (2020-01-21 13:55:29)
>>>>>>
>>>>>>
>>>>>> On 21/01/2020 13:04, Chris Wilson wrote:
>>>>>>> +             GEM_BUG_ON(!reset_in_progress(&engine->execlists));
>>>>>>> +
>>>>>>> +             /*
>>>>>>> +              * An unsubmitted request along a virtual engine will
>>>>>>> +              * remain on the active (this) engine until we are able
>>>>>>> +              * to process the context switch away (and so mark the
>>>>>>> +              * context as no longer in flight). That cannot have happened
>>>>>>> +              * yet, otherwise we would not be hanging!
>>>>>>> +              */
>>>>>>> +             spin_lock_irqsave(&ve->base.active.lock, flags);
>>>>>>> +             GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
>>>>>>> +             GEM_BUG_ON(ve->request != rq);
>>>>>>> +             ve->request = NULL;
>>>>>>> +             spin_unlock_irqrestore(&ve->base.active.lock, flags);
>>>>>>> +
>>>>>>> +             rq->engine = engine;
>>>>>>
>>>>>> Lets see I understand this... tasklet has been disabled and ring paused.
>>>>>> But we find an uncompleted request in the ELSP context, with rq->engine
>>>>>> == virtual engine. Therefore this cannot be the first request on this
>>>>>> timeline but has to be later.
>>>>>
>>>>> Not quite.
>>>>>
>>>>> engine->execlists.active[] tracks the HW, it get's updated only upon
>>>>> receiving HW acks (or we reset).
>>>>>
>>>>> So if execlists_active()->engine == virtual, it can only mean that the
>>>>> inflight _hanging_ request has already been unsubmitted by an earlier
>>>>> preemption in execlists_dequeue(), but that preemption has not yet been
>>>>> processed by the HW. (Hence the preemption-reset underway.)
>>>>>
>>>>> Now while we coalesce the requests for a context into a single ELSP[]
>>>>> slot, and only record the last request submitted for a context, we have
>>>>> to walk back along that context's timeline to find the earliest
>>>>> incomplete request and blame the hang upon it.
>>>>>
>>>>> For a virtual engine, it's much simpler as there is only ever one
>>>>> request in flight, but I don't think that has any impact here other
>>>>> than that we only need to repair the single unsubmitted request that was
>>>>> returned to the virtual engine.
>>>>>
>>>>>> One which has been put on the runqueue but
>>>>>> not yet submitted to hw. (Because one at a time.) Or it has been
>>>>>> unsubmitted by __unwind_incomplete_request already. In the former case
>>>>>> why move it to the physical engine? Also in the latter actually, it
>>>>>> would overwrite rq->engine with the physical one.
>>>>>
>>>>> Yes. For incomplete preemption event, the request is *still* on this
>>>>> engine and has not been released (rq->context->inflight == engine, so it
>>>>> cannot be submitted to any other engine, until after we acknowledge the
>>>>> context has been saved and is no longer being accessed by HW.) It is
>>>>> legal for us to process the hanging request along this engine; we have a
>>>>> suboptimal decision to return the request to the same engine after the
>>>>> reset, but since we have replaced the hanging payload, the request is a
>>>>> mere signaling placeholder (and I do not think will overly burden the
>>>>> system and negatively impact other virtual engines).
>>>>
>>>> What if the request in elsp actually completed in the meantime eg.
>>>> preemption timeout was a false positive?
>>>>
>>>> In execlists_capture we do:
>>>>
>>>>           cap->rq = execlists_active(&engine->execlists);
>>>>
>>>> This gets a completed request, then we do:
>>>>
>>>>           cap->rq = active_request(cap->rq->context->timeline, cap->rq);
>>>>
>>>> This walks along the virtual timeline and finds a next virtual request.
>>>> It then binds this request to a physical engine and sets ve->request to
>>>> NULL.
>>>
>>> If we miss the completion event, then active_request() returns the
>>> original request and we blame it for a having a 650ms preemption-off
>>> shader with a 640ms preemption timeout.
>>
>> I am thinking of this sequence of interleaved events:
>>
>>          preempt_timeout
>>                                  tasklet_disable
>>                                  ring_pause
>>                                  execlist_active
>>          seqno write visible
>>                                  active_request - walks the tl to next
> 
> ... tries to walk to next, sees no incomplete request, returns original
> request.
> 
> static struct i915_request *
> active_request(const struct intel_timeline * const tl, struct i915_request *rq)
> {
>          struct i915_request *active = rq;
> 	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this sneaky line
> 
>          rcu_read_lock();
>          list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
>                  if (i915_request_completed(rq))
>                          break;
> 
>                  active = rq;
> 		^^^^^^^^^^^^ these too may complete at any moment after
> 		our inspection
> 
> 
>          }
>          rcu_read_unlock();
> 
>          return active;
> }

Brain fart on my part, sorry. I was confused.

Regards,

Tvrtko

>>                                  execlist_hold
>>                                  schedule_worker
>>                                  tasklet_enable
>>          process_csb completed
>>
>> This is not possible? Seqno write happening needs only to be roughly
>> there since as long as tasklet has been disabled execlist->active
>> remains fixed.
> 
> It's certainly possible, the requests do keep going on the HW up until
> the next semaphore (which is after the seqno write). That is taken into
> account in that we may end up trying to reset a completed request, which
> should be avoided in execlists_reset() [after the HW has processed the
> reset request], but we capture the request anyway and put it back for
> execution (which is avoided in execlists_dequeue). Isn't preempt-to-busy
> fun?
> -Chris
>

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3a30767ff0c4..11dfee4fe9ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2396,6 +2396,35 @@  static void __execlists_hold(struct i915_request *rq)
 static void execlists_hold(struct intel_engine_cs *engine,
 			   struct i915_request *rq)
 {
+	if (rq->engine != engine) { /* preempted virtual engine */
+		struct virtual_engine *ve = to_virtual_engine(rq->engine);
+		unsigned long flags;
+
+		/*
+		 * intel_context_inflight() is only protected by virtue
+		 * of process_csb() being called only by the tasklet (or
+		 * directly from inside reset while the tasklet is suspended).
+		 * Assert that neither of those are allowed to run while we
+		 * poke at the request queues.
+		 */
+		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
+
+		/*
+		 * An unsubmitted request along a virtual engine will
+		 * remain on the active (this) engine until we are able
+		 * to process the context switch away (and so mark the
+		 * context as no longer in flight). That cannot have happened
+		 * yet, otherwise we would not be hanging!
+		 */
+		spin_lock_irqsave(&ve->base.active.lock, flags);
+		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
+		GEM_BUG_ON(ve->request != rq);
+		ve->request = NULL;
+		spin_unlock_irqrestore(&ve->base.active.lock, flags);
+
+		rq->engine = engine;
+	}
+
 	spin_lock_irq(&engine->active.lock);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index b208c2176bbd..1beb47c3ba9e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -335,7 +335,6 @@  static int live_hold_reset(void *arg)
 
 		if (test_and_set_bit(I915_RESET_ENGINE + id,
 				     &gt->reset.flags)) {
-			spin_unlock_irq(&engine->active.lock);
 			intel_gt_set_wedged(gt);
 			err = -EBUSY;
 			goto out;
@@ -3411,6 +3410,162 @@  static int live_virtual_bond(void *arg)
 	return 0;
 }
 
+static int reset_virtual_engine(struct intel_gt *gt,
+				struct intel_engine_cs **siblings,
+				unsigned int nsibling)
+{
+	struct intel_engine_cs *engine;
+	struct intel_context *ve;
+	unsigned long *heartbeat;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned int n;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
+	if (!heartbeat)
+		return -ENOMEM;
+
+	if (igt_spinner_init(&spin, gt)) {
+		err = -ENOMEM;
+		goto out_heartbeat;
+	}
+
+	ve = intel_execlists_create_virtual(siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_spin;
+	}
+
+	for (n = 0; n < nsibling; n++)
+		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
+
+	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out;
+	}
+	i915_request_add(rq);
+
+	if (!igt_wait_for_spinner(&spin, rq)) {
+		intel_gt_set_wedged(gt);
+		err = -ETIME;
+		goto out;
+	}
+
+	engine = rq->engine;
+	GEM_BUG_ON(engine == ve->engine);
+
+	/* Take ownership of the reset and tasklet */
+	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+			     &gt->reset.flags)) {
+		intel_gt_set_wedged(gt);
+		err = -EBUSY;
+		goto out;
+	}
+	tasklet_disable(&engine->execlists.tasklet);
+
+	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+	/* Fake a preemption event; failed of course */
+	spin_lock_irq(&engine->active.lock);
+	__unwind_incomplete_requests(engine);
+	spin_unlock_irq(&engine->active.lock);
+
+	/* Reset the engine while keeping our active request on hold */
+	execlists_hold(engine, rq);
+	GEM_BUG_ON(!i915_request_on_hold(rq));
+
+	intel_engine_reset(engine, NULL);
+	GEM_BUG_ON(rq->fence.error != -EIO);
+
+	/* Release our grasp on the engine, letting CS flow again */
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
+
+	/* Check that we do not resubmit the held request */
+	i915_request_get(rq);
+	if (!i915_request_wait(rq, 0, HZ / 5)) {
+		pr_err("%s: on hold request completed!\n",
+		       engine->name);
+		i915_request_put(rq);
+		err = -EIO;
+		goto out;
+	}
+	GEM_BUG_ON(!i915_request_on_hold(rq));
+
+	/* But is resubmitted on release */
+	execlists_unhold(engine, rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		pr_err("%s: held request did not complete!\n",
+		       engine->name);
+		intel_gt_set_wedged(gt);
+		err = -ETIME;
+	}
+	i915_request_put(rq);
+
+out:
+	for (n = 0; n < nsibling; n++)
+		engine_heartbeat_enable(siblings[n], heartbeat[n]);
+
+	intel_context_put(ve);
+out_spin:
+	igt_spinner_fini(&spin);
+out_heartbeat:
+	kfree(heartbeat);
+	return err;
+}
+
+static int live_virtual_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+
+	/*
+	 * Check that the context image retains non-privileged (user) registers
+	 * from one engine to the next. For this we check that the CS_GPR
+	 * are preserved.
+	 */
+
+	if (USES_GUC_SUBMISSION(gt->i915))
+		return 0;
+
+	/* As we use CS_GPR we cannot run before they existed on all engines. */
+	if (INTEL_GEN(gt->i915) < 9)
+		return 0;
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, err;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!gt->engine_class[class][inst])
+				continue;
+
+			siblings[nsibling++] = gt->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = reset_virtual_engine(gt, siblings, nsibling);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -3436,6 +3591,7 @@  int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_virtual_mask),
 		SUBTEST(live_virtual_preserved),
 		SUBTEST(live_virtual_bond),
+		SUBTEST(live_virtual_reset),
 	};
 
 	if (!HAS_EXECLISTS(i915))