[26/50] drm/i915: Apply an execution_mask to the virtual_engine

Message ID	20190412085410.10392-27-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 12 Apr 2019 09:53:46 +0100 Message-Id: <20190412085410.10392-27-chris@chris-wilson.co.uk> In-Reply-To: <20190412085410.10392-1-chris@chris-wilson.co.uk> References: <20190412085410.10392-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 26/50] drm/i915: Apply an execution_mask to the virtual_engine Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/50] drm/i915: Introduce struct class_instance for engines across the uAPI \| expand [01/50] drm/i915: Introduce struct class_instance for engines across the uAPI [02/50] drm/i915: Mark up ips for RCU protection [03/50] drm/i915: Store the default sseu setup on the engine [04/50] drm/i915: Move GraphicsTechnology files under gt/ [05/50] drm/i915: Introduce struct intel_wakeref [06/50] drm/i915: Pull the GEM powermangement coupling into its own file [07/50] drm/i915: Introduce context->enter() and context->exit() [08/50] drm/i915: Pass intel_context to i915_request_create() [09/50] drm/i915: Invert the GEM wakeref hierarchy [10/50] drm/i915/gvt: Pin the per-engine GVT shadow contexts [11/50] drm/i915: Explicitly pin the logical context for execbuf [12/50] drm/i915: Export intel_context_instance() [13/50] drm/i915/selftests: Use the real kernel context for sseu isolation tests [14/50] drm/i915/selftests: Pass around intel_context for sseu [15/50] drm/i915: Pass intel_context to intel_context_pin_lock() [16/50] drm/i915: Split engine setup/init into two phases [17/50] drm/i915: Switch back to an array of logical per-engine HW contexts [18/50] drm/i915: Remove intel_context.active_link [19/50] drm/i915: Move i915_request_alloc into selftests/ [20/50] drm/i915: Allow multiple user handles to the same VM [21/50] drm/i915: Restore control over ppgtt for context creation ABI [22/50] drm/i915: Allow a context to define its set of engines [23/50] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation [24/50] drm/i915: Allow userspace to clone contexts on creation [25/50] drm/i915: Load balancing across a virtual engine [26/50] drm/i915: Apply an execution_mask to the virtual_engine [27/50] drm/i915: Extend execution fence to support a callback [28/50] drm/i915/execlists: Virtual engine bonding [29/50] drm/i915: Allow specification of parallel execbuf [30/50] drm/i915: Split GEM object type definition to its own header [31/50] drm/i915: Pull GEM ioctls interface to its own file [32/50] drm/i915: Move object->pages API to i915_gem_object.[ch] [33/50] drm/i915: Move shmem object setup to its own file [34/50] drm/i915: Move phys objects to its own file [35/50] drm/i915: Move mmap and friends to its own file [36/50] drm/i915: Move GEM domain management to its own file [37/50] drm/i915: Move more GEM objects under gem/ [38/50] drm/i915: Pull scatterlist utils out of i915_gem.h [39/50] drm/i915: Move GEM object domain management from struct_mutex to local [40/50] drm/i915: Move GEM object waiting to its own file [41/50] drm/i915: Move GEM object busy checking to its own file [42/50] drm/i915: Move GEM client throttling to its own file [43/50] drm/i915: Drop the deferred active reference [44/50] lockdep [45/50] drm/i915: Move object close under its own lock [46/50] drm/i915: Rename intel_context.active to .inflight [47/50] drm/i915: Keep contexts pinned until after the next kernel context switch [48/50] drm/i915: Stop retiring along engine [49/50] drm/i915: Replace engine->timeline with a plain list [50/50] drm/i915/execlists: Preempt-to-busy

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 43894c2cc5ee..3833c8d2c28c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status) intel_engine_context_out(rq->engine); execlists_context_status_change(rq, status); trace_i915_request_out(rq); + + /* + * If this is part of a virtual engine, its next request may have + * been blocked waiting for access to the active context. We have + * to kick all the siblings again in case we need to switch (e.g. + * the next request is not runnable on this engine). Hopefully, + * we will already have submitted the next request before the + * tasklet runs and do not need to rebuild each virtual tree + * and kick everyone again. + */ + if (rq->engine != rq->hw_context->engine) + tasklet_schedule(&rq->hw_context->engine->execlists.tasklet); } static u64 execlists_update_context(struct i915_request *rq) @@ -779,6 +791,9 @@ static bool virtual_matches(const struct virtual_engine *ve, { const struct intel_engine_cs *active; + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ + return false; + /* * We track when the HW has completed saving the context image * (i.e. when we have seen the final CS event switching out of @@ -3138,12 +3153,44 @@ static const struct intel_context_ops virtual_context_ops = { .destroy = virtual_context_destroy, }; +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) +{ + struct i915_request *rq; + intel_engine_mask_t mask; + + rq = READ_ONCE(ve->request); + if (!rq) + return 0; + + /* The rq is ready for submission; rq->execution_mask is now stable. */ + mask = rq->execution_mask; + if (unlikely(!mask)) { + /* Invalid selection, submit to a random engine in error */ + i915_request_skip(rq, -ENODEV); + mask = ve->siblings[0]->mask; + } + + GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", + ve->base.name, + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); + + return mask; +} + static void virtual_submission_tasklet(unsigned long data) { struct virtual_engine * const ve = (struct virtual_engine *)data; const int prio = ve->base.execlists.queue_priority_hint; + intel_engine_mask_t mask; unsigned int n; + rcu_read_lock(); + mask = virtual_submission_mask(ve); + rcu_read_unlock(); + if (unlikely(!mask)) + return; + local_irq_disable(); for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; @@ -3151,6 +3198,17 @@ static void virtual_submission_tasklet(unsigned long data) struct rb_node **parent, *rb; bool first; + if (unlikely(!(mask & sibling->mask))) { + if (!RB_EMPTY_NODE(&node->rb)) { + spin_lock(&sibling->timeline.lock); + rb_erase_cached(&node->rb, + &sibling->execlists.virtual); + RB_CLEAR_NODE(&node->rb); + spin_unlock(&sibling->timeline.lock); + } + continue; + } + spin_lock(&sibling->timeline.lock); if (!RB_EMPTY_NODE(&node->rb)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index f34aa9e042a3..209e51ef13e6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg) return err; } +static int mask_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct igt_live_test t; + unsigned int n; + int err; + + /* + * Check that by setting the execution mask on a request, we can + * restrict it to our desired engine within the virtual engine. + */ + + ctx = kernel_context(i915); + if (!ctx) + return -ENOMEM; + + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_close; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < nsibling; n++) { + request[n] = i915_request_create(ve); + if (IS_ERR(request)) { + err = PTR_ERR(request); + nsibling = n; + goto out; + } + + /* Reverse order as it's more likely to be unnatural */ + request[n]->execution_mask = siblings[nsibling - n - 1]->mask; + + i915_request_get(request[n]); + i915_request_add(request[n]); + } + + for (n = 0; n < nsibling; n++) { + if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto out; + } + + if (request[n]->engine != siblings[nsibling - n - 1]) { + pr_err("Executed on wrong sibling '%s', expected '%s'\n", + request[n]->engine->name, + siblings[nsibling - n - 1]->name); + err = -EINVAL; + goto out; + } + } + + err = igt_live_test_end(&t); + if (err) + goto out; + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < nsibling; n++) + i915_request_put(request[n]); + +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_close: + kernel_context_close(ctx); + return err; +} + +static int live_virtual_mask(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + int err = 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!i915->engine_class[class][inst]) + break; + + siblings[nsibling++] = i915->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = mask_virtual_engine(i915, siblings, nsibling); + if (err) + goto out_unlock; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), + SUBTEST(live_virtual_mask), }; if (!HAS_EXECLISTS(i915)) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 891e31d54307..9352406d5504 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -687,6 +687,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; rq->capture_list = NULL; rq->waitboost = false; + rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->active_list); INIT_LIST_HEAD(&rq->execute_cb); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8025a89b5999..d7f9b2194568 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,8 @@ #include <linux/dma-fence.h> #include <linux/lockdep.h> +#include "gt/intel_engine_types.h" + #include "i915_gem.h" #include "i915_scheduler.h" #include "i915_selftest.h" @@ -156,6 +158,7 @@ struct i915_request { */ struct i915_sched_node sched; struct i915_dependency dep; + intel_engine_mask_t execution_mask; /* * A convenience pointer to the current breadcrumb value stored in

[26/50] drm/i915: Apply an execution_mask to the virtual_engine

Commit Message

Patch