Message ID | 20190921095542.23205-2-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/5] drm/i915/execlists: Refactor -EIO markup of hung requests | expand |
On 21/09/2019 10:55, Chris Wilson wrote: > If we are asked to submit a completed request, just move it onto the > active-list without modifying it's payload. If we try to emit the > modified payload of a completed request, we risk racing with the > ring->head update during retirement which may advance the head past our > breadcrumb and so we generate a warning for the emission being behind > the RING_HEAD. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++++++++---------------- > drivers/gpu/drm/i915/i915_request.c | 28 ++++++++++-------- > drivers/gpu/drm/i915/i915_request.h | 2 +- > 3 files changed, 37 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 53e823d36b28..53bc4308793c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -806,6 +806,9 @@ static bool can_merge_rq(const struct i915_request *prev, > GEM_BUG_ON(prev == next); > GEM_BUG_ON(!assert_priority_queue(prev, next)); > > + if (i915_request_completed(next)) > + return true; > + This reads very un-intuitive. Why would it always be okay to merge possibly unrelated requests? From which it follows it must be a hack of some kind - from which it follows it needs a comment to explain it. > if (!can_merge_ctx(prev->hw_context, next->hw_context)) > return false; > > @@ -1188,21 +1191,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > continue; > } > > - if (i915_request_completed(rq)) { > - ve->request = NULL; > - ve->base.execlists.queue_priority_hint = INT_MIN; > - rb_erase_cached(rb, &execlists->virtual); > - RB_CLEAR_NODE(rb); > - > - rq->engine = engine; > - __i915_request_submit(rq); > - > - spin_unlock(&ve->base.active.lock); > - > - rb = rb_first_cached(&execlists->virtual); > - continue; > - } > - > if (last && !can_merge_rq(last, rq)) { > spin_unlock(&ve->base.active.lock); > return; /* leave this for another */ > @@ -1256,11 +1244,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > GEM_BUG_ON(ve->siblings[0] != engine); > } > > - __i915_request_submit(rq); > - if (!i915_request_completed(rq)) { > + if (__i915_request_submit(rq)) { > submit = true; > last = rq; > } > + > + if (!submit) { > + spin_unlock(&ve->base.active.lock); > + rb = rb_first_cached(&execlists->virtual); > + continue; > + } This block also needs a comment I think. It's about skipping until first incomplete request in the queue? > } > > spin_unlock(&ve->base.active.lock); > @@ -1273,8 +1266,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > int i; > > priolist_for_each_request_consume(rq, rn, p, i) { > - if (i915_request_completed(rq)) > - goto skip; > + bool merge = true; > > /* > * Can we combine this request with the current port? > @@ -1315,14 +1307,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > ctx_single_port_submission(rq->hw_context)) > goto done; > > - *port = execlists_schedule_in(last, port - execlists->pending); > - port++; > + merge = false; > } > > - last = rq; > - submit = true; > -skip: > - __i915_request_submit(rq); > + if (__i915_request_submit(rq)) { > + if (!merge) { > + *port = execlists_schedule_in(last, port - execlists->pending); > + port++; > + } > + submit = true; > + last = rq; > + } > } > > rb_erase_cached(&p->node, &execlists->queue); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 9bd8538b1907..db1a0048a753 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, > return 0; > } > > -void __i915_request_submit(struct i915_request *request) > +bool __i915_request_submit(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > + bool result = false; > > GEM_TRACE("%s fence %llx:%lld, current %d\n", > engine->name, > @@ -389,6 +390,9 @@ void __i915_request_submit(struct i915_request *request) > GEM_BUG_ON(!irqs_disabled()); > lockdep_assert_held(&engine->active.lock); > > + if (i915_request_completed(request)) > + goto xfer; A comment here as well I think to explain what happens next with completed requests put on the active list. They just get removed during retire? Do they need to even be put on that list? > + > if (i915_gem_context_is_banned(request->gem_context)) > i915_request_skip(request, -EIO); > > @@ -412,13 +416,18 @@ void __i915_request_submit(struct i915_request *request) > i915_sw_fence_signaled(&request->semaphore)) > engine->saturated |= request->sched.semaphores; > > - /* We may be recursing from the signal callback of another i915 fence */ > - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > + engine->emit_fini_breadcrumb(request, > + request->ring->vaddr + request->postfix); > > - list_move_tail(&request->sched.link, &engine->active.requests); > + trace_i915_request_execute(request); > + engine->serial++; > + result = true; > > - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); > - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); > +xfer: /* We may be recursing from the signal callback of another i915 fence */ > + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > + > + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) > + list_move_tail(&request->sched.link, &engine->active.requests); > > if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && > !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && > @@ -429,12 +438,7 @@ void __i915_request_submit(struct i915_request *request) > > spin_unlock(&request->lock); > > - engine->emit_fini_breadcrumb(request, > - request->ring->vaddr + request->postfix); > - > - engine->serial++; > - > - trace_i915_request_execute(request); > + return result; > } > > void i915_request_submit(struct i915_request *request) > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index b18f49528ded..ec5bb4c2e5ae 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq, > > void i915_request_add(struct i915_request *rq); > > -void __i915_request_submit(struct i915_request *request); > +bool __i915_request_submit(struct i915_request *request); > void i915_request_submit(struct i915_request *request); > > void i915_request_skip(struct i915_request *request, int error); > Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-09-23 10:51:48) > > On 21/09/2019 10:55, Chris Wilson wrote: > > If we are asked to submit a completed request, just move it onto the > > active-list without modifying it's payload. If we try to emit the > > modified payload of a completed request, we risk racing with the > > ring->head update during retirement which may advance the head past our > > breadcrumb and so we generate a warning for the emission being behind > > the RING_HEAD. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++++++++---------------- > > drivers/gpu/drm/i915/i915_request.c | 28 ++++++++++-------- > > drivers/gpu/drm/i915/i915_request.h | 2 +- > > 3 files changed, 37 insertions(+), 38 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 53e823d36b28..53bc4308793c 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -806,6 +806,9 @@ static bool can_merge_rq(const struct i915_request *prev, > > GEM_BUG_ON(prev == next); > > GEM_BUG_ON(!assert_priority_queue(prev, next)); > > > > + if (i915_request_completed(next)) > > + return true; > > + > > This reads very un-intuitive. Why would it always be okay to merge > possibly unrelated requests? From which it follows it must be a hack of > some kind - from which it follows it needs a comment to explain it. We don't submit a known completed request, hence there is no context switch. > > @@ -1256,11 +1244,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > GEM_BUG_ON(ve->siblings[0] != engine); > > } > > > > - __i915_request_submit(rq); > > - if (!i915_request_completed(rq)) { > > + if (__i915_request_submit(rq)) { > > submit = true; > > last = rq; > > } > > + > > + if (!submit) { > > + spin_unlock(&ve->base.active.lock); > > + rb = rb_first_cached(&execlists->virtual); > > + continue; > > + } > > This block also needs a comment I think. It's about skipping until first > incomplete request in the queue? It's the same logic as before. It's just saying having detected that we have a bunch of veng requests, keep searching that tree in decreasing priority order until it is no longer relevant. > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 9bd8538b1907..db1a0048a753 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, > > return 0; > > } > > > > -void __i915_request_submit(struct i915_request *request) > > +bool __i915_request_submit(struct i915_request *request) > > { > > struct intel_engine_cs *engine = request->engine; > > + bool result = false; > > > > GEM_TRACE("%s fence %llx:%lld, current %d\n", > > engine->name, > > @@ -389,6 +390,9 @@ void __i915_request_submit(struct i915_request *request) > > GEM_BUG_ON(!irqs_disabled()); > > lockdep_assert_held(&engine->active.lock); > > > > + if (i915_request_completed(request)) > > + goto xfer; > > A comment here as well I think to explain what happens next with > completed requests put on the active list. They just get removed during > retire? Do they need to even be put on that list? They get removed during retire. They must be removed from the priority queue; and if they had been retired already they would not be in the priority queue, ergo at this point they are completed and not retired. -Chris
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 53e823d36b28..53bc4308793c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -806,6 +806,9 @@ static bool can_merge_rq(const struct i915_request *prev, GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); + if (i915_request_completed(next)) + return true; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -1188,21 +1191,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } - if (i915_request_completed(rq)) { - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - rq->engine = engine; - __i915_request_submit(rq); - - spin_unlock(&ve->base.active.lock); - - rb = rb_first_cached(&execlists->virtual); - continue; - } - if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); return; /* leave this for another */ @@ -1256,11 +1244,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(ve->siblings[0] != engine); } - __i915_request_submit(rq); - if (!i915_request_completed(rq)) { + if (__i915_request_submit(rq)) { submit = true; last = rq; } + + if (!submit) { + spin_unlock(&ve->base.active.lock); + rb = rb_first_cached(&execlists->virtual); + continue; + } } spin_unlock(&ve->base.active.lock); @@ -1273,8 +1266,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (i915_request_completed(rq)) - goto skip; + bool merge = true; /* * Can we combine this request with the current port? @@ -1315,14 +1307,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - *port = execlists_schedule_in(last, port - execlists->pending); - port++; + merge = false; } - last = rq; - submit = true; -skip: - __i915_request_submit(rq); + if (__i915_request_submit(rq)) { + if (!merge) { + *port = execlists_schedule_in(last, port - execlists->pending); + port++; + } + submit = true; + last = rq; + } } rb_erase_cached(&p->node, &execlists->queue); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9bd8538b1907..db1a0048a753 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -void __i915_request_submit(struct i915_request *request) +bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + bool result = false; GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, @@ -389,6 +390,9 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); + if (i915_request_completed(request)) + goto xfer; + if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -412,13 +416,18 @@ void __i915_request_submit(struct i915_request *request) i915_sw_fence_signaled(&request->semaphore)) engine->saturated |= request->sched.semaphores; - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); - list_move_tail(&request->sched.link, &engine->active.requests); + trace_i915_request_execute(request); + engine->serial++; + result = true; - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); +xfer: /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + list_move_tail(&request->sched.link, &engine->active.requests); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && @@ -429,12 +438,7 @@ void __i915_request_submit(struct i915_request *request) spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request, - request->ring->vaddr + request->postfix); - - engine->serial++; - - trace_i915_request_execute(request); + return result; } void i915_request_submit(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index b18f49528ded..ec5bb4c2e5ae 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq, void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request); +bool __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); void i915_request_skip(struct i915_request *request, int error);
If we are asked to submit a completed request, just move it onto the active-list without modifying it's payload. If we try to emit the modified payload of a completed request, we risk racing with the ring->head update during retirement which may advance the head past our breadcrumb and so we generate a warning for the emission being behind the RING_HEAD. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++++++++---------------- drivers/gpu/drm/i915/i915_request.c | 28 ++++++++++-------- drivers/gpu/drm/i915/i915_request.h | 2 +- 3 files changed, 37 insertions(+), 38 deletions(-)