[10/16] drm/i915/gt: Use virtual_engine during execlists_dequeue

Message ID	20201124114219.29020-10-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=tBxe=E6=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org BD0A8206D8 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Tue, 24 Nov 2020 11:42:13 +0000 Message-Id: <20201124114219.29020-10-chris@chris-wilson.co.uk> In-Reply-To: <20201124114219.29020-1-chris@chris-wilson.co.uk> References: <20201124114219.29020-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 10/16] drm/i915/gt: Use virtual_engine during execlists_dequeue Precedence: list Cc: Chris Wilson <chris@chris-wilson.co.uk> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/16] drm/i915/gem: Drop free_work for GEM contexts \| expand [01/16] drm/i915/gem: Drop free_work for GEM contexts [02/16] drm/i915/gt: Track the overall awake/busy time [03/16] drm/i915/gt: Protect context lifetime with RCU [04/16] drm/i915/gt: Split the breadcrumb spinlock between global and contexts [05/16] drm/i915/gt: Move the breadcrumb to the signaler if completed upon cancel [06/16] drm/i915/gt: Decouple completed requests on unwind [07/16] drm/i915/gt: Check for a completed last request once [08/16] drm/i915/gt: Replace direct submit with direct call to tasklet [09/16] drm/i915/gt: ce->inflight updates are now serialised [10/16] drm/i915/gt: Use virtual_engine during execlists_dequeue [11/16] drm/i915/gt: Decouple inflight virtual engines [12/16] drm/i915/gt: Defer schedule_out until after the next dequeue [13/16] drm/i915/gt: Remove virtual breadcrumb before transfer [14/16] drm/i915/gt: Shrink the critical section for irq signaling [15/16] drm/i915/gt: Resubmit the virtual engine on schedule-out [16/16] drm/i915/gt: Simplify virtual engine handling for execlists_hold()

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3a6a9539b54e..a4f6e446f188 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -454,9 +454,15 @@ static int queue_prio(const struct intel_engine_execlists *execlists) return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); } +static int virtual_prio(const struct intel_engine_execlists *el) +{ + struct rb_node *rb = rb_first_cached(&el->virtual); + + return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; +} + static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq, - struct rb_node *rb) + const struct i915_request *rq) { int last_prio; @@ -493,25 +499,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; - if (rb) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - bool preempt = false; - - if (engine == ve->siblings[0]) { /* only preempt one sibling */ - struct i915_request *next; - - rcu_read_lock(); - next = READ_ONCE(ve->request); - if (next) - preempt = rq_prio(next) > last_prio; - rcu_read_unlock(); - } - - if (preempt) - return preempt; - } - /* * If the inflight context did not trigger the preemption, then maybe * it was the set of queued requests? Pick the highest priority in @@ -522,7 +509,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same * context, it's priority would not exceed ELSP[0] aka last_prio. */ - return queue_prio(&engine->execlists) > last_prio; + return max(virtual_prio(&engine->execlists), + queue_prio(&engine->execlists)) > last_prio; } __maybe_unused static inline bool @@ -1808,6 +1796,35 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } +static struct virtual_engine * +first_virtual_engine(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + struct rb_node *rb = rb_first_cached(&el->virtual); + + while (rb) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + /* lazily cleanup after another engine handled rq */ + if (!rq) { + rb_erase_cached(rb, &el->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&el->virtual); + continue; + } + + if (!virtual_matches(ve, rq, engine)) { + rb = rb_next(rb); + continue; + } + return ve; + } + + return NULL; +} + static void virtual_xfer_context(struct virtual_engine *ve, struct intel_engine_cs *engine) { @@ -1896,32 +1913,15 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq, - const struct rb_node *rb) + const struct i915_request *rq) { int hint; if (!intel_engine_has_timeslices(engine)) return false; - hint = engine->execlists.queue_priority_hint; - - if (rb) { - const struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - const struct intel_engine_cs *inflight = - intel_context_inflight(&ve->context); - - if (!inflight || inflight == engine) { - struct i915_request *next; - - rcu_read_lock(); - next = READ_ONCE(ve->request); - if (next) - hint = max(hint, rq_prio(next)); - rcu_read_unlock(); - } - } + hint = max(engine->execlists.queue_priority_hint, + virtual_prio(&engine->execlists)); if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); @@ -2068,6 +2068,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last = *execlists->active; + struct virtual_engine *ve; struct rb_node *rb; bool submit = false; @@ -2095,26 +2096,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock(&engine->active.lock); - for (rb = rb_first_cached(&execlists->virtual); rb; ) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq = READ_ONCE(ve->request); - - if (!rq) { /* lazily cleanup after another engine handled rq */ - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - rb = rb_first_cached(&execlists->virtual); - continue; - } - - if (!virtual_matches(ve, rq, engine)) { - rb = rb_next(rb); - continue; - } - - break; - } - /* * If the queue is higher priority than the last * request in the currently active context, submit afresh. @@ -2137,7 +2118,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last) { if (i915_request_completed(last)) { goto check_secondary; - } else if (need_preempt(engine, last, rb)) { + } else if (need_preempt(engine, last)) { ENGINE_TRACE(engine, "preempting last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -2163,7 +2144,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last, rb) && + } else if (need_timeslice(engine, last) && timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", @@ -2214,96 +2195,86 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } - while (rb) { /* XXX virtual is always taking precedence */ - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + /* XXX virtual is always taking precedence */ + while ((ve = first_virtual_engine(engine))) { struct i915_request *rq; spin_lock(&ve->base.active.lock); rq = ve->request; - if (unlikely(!rq)) { /* lost the race to a sibling */ - spin_unlock(&ve->base.active.lock); - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - rb = rb_first_cached(&execlists->virtual); - continue; - } + if (unlikely(!rq)) /* lost the race to a sibling */ + goto unlock; - GEM_BUG_ON(rq != ve->request); GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (rq_prio(rq) >= queue_prio(execlists)) { - if (!virtual_matches(ve, rq, engine)) { - spin_unlock(&ve->base.active.lock); - rb = rb_next(rb); - continue; - } + if (unlikely(rq_prio(rq) < queue_prio(execlists))) { + spin_unlock(&ve->base.active.lock); + break; + } - if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - spin_unlock(&engine->active.lock); - start_timeslice(engine, rq_prio(rq)); - return; /* leave this for another sibling */ - } + GEM_BUG_ON(!virtual_matches(ve, rq, engine)); - ENGINE_TRACE(engine, - "virtual rq=%llx:%lld%s, new engine? %s\n", - rq->fence.context, - rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - yesno(engine != ve->siblings[0])); - - WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, - INT_MIN); - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.active.lock); + spin_unlock(&engine->active.lock); + start_timeslice(engine, rq_prio(rq)); + return; /* leave this for another sibling */ + } - GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - WRITE_ONCE(rq->engine, engine); + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); - if (__i915_request_submit(rq)) { - /* - * Only after we confirm that we will submit - * this request (i.e. it has not already - * completed), do we want to update the context. - * - * This serves two purposes. It avoids - * unnecessary work if we are resubmitting an - * already completed request after timeslicing. - * But more importantly, it prevents us altering - * ve->siblings[] on an idle context, where - * we may be using ve->siblings[] in - * virtual_context_enter / virtual_context_exit. - */ - virtual_xfer_context(ve, engine); - GEM_BUG_ON(ve->siblings[0] != engine); + WRITE_ONCE(ve->request, NULL); + WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); - submit = true; - last = rq; - } - i915_request_put(rq); + rb = &ve->nodes[engine->id].rb; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + GEM_BUG_ON(!(rq->execution_mask & engine->mask)); + WRITE_ONCE(rq->engine, engine); + if (__i915_request_submit(rq)) { /* - * Hmm, we have a bunch of virtual engine requests, - * but the first one was already completed (thanks - * preempt-to-busy!). Keep looking at the veng queue - * until we have no more relevant requests (i.e. - * the normal submit queue has higher priority). + * Only after we confirm that we will submit + * this request (i.e. it has not already + * completed), do we want to update the context. + * + * This serves two purposes. It avoids + * unnecessary work if we are resubmitting an + * already completed request after timeslicing. + * But more importantly, it prevents us altering + * ve->siblings[] on an idle context, where + * we may be using ve->siblings[] in + * virtual_context_enter / virtual_context_exit. */ - if (!submit) { - spin_unlock(&ve->base.active.lock); - rb = rb_first_cached(&execlists->virtual); - continue; - } + virtual_xfer_context(ve, engine); + GEM_BUG_ON(ve->siblings[0] != engine); + + submit = true; + last = rq; } + i915_request_put(rq); +unlock: spin_unlock(&ve->base.active.lock); - break; + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (submit) + break; } while ((rb = rb_first_cached(&execlists->queue))) {

[10/16] drm/i915/gt: Use virtual_engine during execlists_dequeue

Commit Message

Patch