[38/38] drm/i915: Load balancing across a virtual engine

Message ID	20190118140109.25261-39-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 18 Jan 2019 14:01:09 +0000 Message-Id: <20190118140109.25261-39-chris@chris-wilson.co.uk> In-Reply-To: <20190118140109.25261-1-chris@chris-wilson.co.uk> References: <20190118140109.25261-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 38/38] drm/i915: Load balancing across a virtual engine Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/38] drm/i915/execlists: Store the highest priority context \| expand [01/38] drm/i915/execlists: Store the highest priority context [02/38] drm/i915: Make all GPU resets atomic [03/38] drm/i915/guc: Disable global reset [04/38] drm/i915: Remove GPU reset dependence on struct_mutex [05/38] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest [06/38] drm/i915: Issue engine resets onto idle engines [07/38] drm/i915: Stop tracking MRU activity on VMA [08/38] drm/i915: Pull VM lists under the VM mutex. [09/38] drm/i915: Move vma lookup to its own lock [10/38] drm/i915/selftests: Allocate mock ring/timeline per context [11/38] drm/i915: Always allocate an object/vma for the HWSP [12/38] drm/i915: Move list of timelines under its own lock [13/38] drm/i915: Introduce concept of per-timeline (context) HWSP [14/38] drm/i915: Enlarge vma->pin_count [15/38] drm/i915: Allocate a status page for each timeline [16/38] drm/i915: Share per-timeline HWSP using a slab suballocator [17/38] drm/i915: Keep all partially allocated HWSP on a freelist [18/38] drm/i915: Track the context's seqno in its own timeline HWSP [19/38] drm/i915: Identify active requests [20/38] drm/i915: Remove the intel_engine_notify tracepoint [21/38] drm/i915: Replace global breadcrumbs with per-context interrupt tracking [22/38] drm/i915: Drop fake breadcrumb irq [23/38] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno [24/38] drm/i915: Avoid presumption of execution ordering for kernel context switching [25/38] drm/i915/pmu: Always sample an active ringbuffer [26/38] drm/i915: Remove the global per-engine execution timeline [27/38] drm/i915: Introduce the i915_user_extension_method [28/38] drm/i915: Create/destroy VM (ppGTT) for use with contexts [29/38] drm/i915: Expose user control over the ppGTT associated with a context [30/38] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction [31/38] drm/i915: Allow contexts to share a single timeline across all engines [32/38] drm/i915: Fix I915_EXEC_RING_MASK [33/38] drm/i915: Remove last traces of exec-id (GEM_BUSY) [34/38] drm/i915: Re-arrange execbuf so context is known before engine [35/38] drm/i915: Allow a context to define its set of engines [36/38] drm/i915/execlists: Refactor out can_merge_rq() [37/38] drm/i915: Store the BIT(engine->id) as the engine's mask [38/38] drm/i915: Load balancing across a virtual engine

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f206fbc85cee..ebc690897009 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -168,6 +168,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ + selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index b0e4b976880c..9905fcdd33c8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -89,4 +89,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) return !atomic_read(&t->count); } +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_SCHED, &t->state); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 701af326120d..dbd5c4299246 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,6 +92,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "intel_lrc.h" #include "intel_workarounds.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -225,7 +226,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) ce->ops->destroy(ce); } - kfree(ctx->engines); + if (ctx->engines) { + intel_virtual_engine_put(ctx->engines[0]); + kfree(ctx->engines); + } if (ctx->timeline) i915_timeline_put(ctx->timeline); @@ -348,14 +352,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - INIT_LIST_HEAD(&ce->active_link); - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -764,7 +762,8 @@ last_request_on_engine(struct i915_timeline *timeline, rq = i915_gem_active_raw(&timeline->last_request, &engine->i915->drm.struct_mutex); - if (rq && rq->engine == engine) { + if (rq && + (rq->engine == engine || intel_engine_is_virtual(rq->engine))) { GEM_TRACE("last request for %s on engine %s: %llx:%llu\n", timeline->name, engine->name, rq->fence.context, rq->fence.seqno); @@ -909,13 +908,83 @@ static int set_ppgtt(struct i915_gem_context *ctx, u32 id) return err; } +static int check_user_mbz64(u64 __user *user) +{ + u64 mbz; + + if (get_user(mbz, user)) + return -EFAULT; + + return mbz ? -EINVAL : 0; +} + struct set_engines { struct i915_gem_context *ctx; struct intel_engine_cs **engines; unsigned int nengine; }; +static int set_engines__load_balance(struct i915_user_extension __user *base, + void *data) + +{ + struct i915_context_engines_load_balance __user *ext = + container_of(base, typeof(*ext) __user, base); + const struct set_engines *set = data; + struct intel_engine_cs *ve; + unsigned int n; + u64 mask; + int err; + + if (set->engines[0]) + return -EEXIST; + + if (!HAS_EXECLISTS(set->ctx->i915)) + return -ENODEV; + + if (USES_GUC_SUBMISSION(set->ctx->i915)) + return -ENODEV; + + err = check_user_mbz64(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz); n++) { + err = check_user_mbz64(&ext->mbz[n]); + if (err) + return err; + } + + if (get_user(mask, &ext->engines_mask)) + return -EFAULT; + + if (mask == ~0ull) { + ve = intel_execlists_create_virtual(set->ctx, + set->engines + 1, + set->nengine - 1); + } else { + struct intel_engine_cs *stack[64]; + int bit; + + n = 0; + for_each_set_bit(bit, (unsigned long *)&mask, set->nengine) + stack[n++] = set->engines[bit]; + + ve = intel_execlists_create_virtual(set->ctx, stack, n); + } + if (IS_ERR(ve)) + return PTR_ERR(ve); + + if (cmpxchg(&set->engines[0], NULL, ve)) { + intel_virtual_engine_put(ve); + return -EEXIST; + } + + return 0; +} + static const i915_user_extension_fn set_engines__extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, }; static int set_engines(struct i915_gem_context *ctx, @@ -979,13 +1048,17 @@ static int set_engines(struct i915_gem_context *ctx, ARRAY_SIZE(set_engines__extensions), &set); if (err) { + intel_virtual_engine_put(set.engines[0]); kfree(set.engines); return err; } out: mutex_lock(&ctx->i915->drm.struct_mutex); - kfree(ctx->engines); + if (ctx->engines) { + intel_virtual_engine_put(ctx->engines[0]); + kfree(ctx->engines); + } ctx->engines = set.engines; ctx->nengine = set.nengine; mutex_unlock(&ctx->i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 635f693994c4..2aeca60674ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -171,6 +171,7 @@ struct i915_gem_context { struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *active; + struct intel_engine_cs *owner; struct list_head active_link; struct list_head signal_link; struct list_head signals; @@ -386,4 +387,17 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + ce->gem_context = ctx; + ce->owner = engine; + + INIT_LIST_HEAD(&ce->active_link); + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); +} + #endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22ac02f02b33..6637e9b93a6a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -860,7 +860,7 @@ void i915_request_add(struct i915_request *request) prev = i915_gem_active_raw(&timeline->last_request, &request->i915->drm.struct_mutex); if (prev && !i915_request_completed(prev)) { - if (prev->engine == engine) + if (prev->engine == engine && !intel_engine_is_virtual(engine)) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 53e76a91ad75..c1d119753063 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -347,6 +347,7 @@ static void __i915_schedule(struct i915_request *rq, node->attr.priority = prio; if (!list_empty(&node->link)) { + GEM_BUG_ON(intel_engine_is_virtual(engine)); if (last != engine) { pl = i915_sched_lookup_priolist(engine, prio); last = engine; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e365c12c08eb..2d2803b76a12 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -164,6 +164,29 @@ #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +struct virtual_engine { + struct intel_engine_cs base; + + struct intel_context context; + struct kref kref; + + struct intel_engine_cs *bound; + + struct i915_request *request; + struct ve_node { + struct rb_node rb; + int prio; + } nodes[I915_NUM_ENGINES]; + + unsigned int count; + struct intel_engine_cs *siblings[0]; +}; + +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) +{ + return container_of(engine, struct virtual_engine, base); +} + static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_context *ce); @@ -283,6 +306,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) lockdep_assert_held(&engine->execution_lock); list_for_each_entry_safe_reverse(rq, rn, &engine->requests, link) { + struct intel_engine_cs *owner; + if (i915_request_completed(rq)) break; @@ -291,14 +316,20 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq->hw_context->active); - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + owner = rq->hw_context->owner; + if (likely(owner == engine)) { + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - list_add(&rq->sched.link, pl); + list_add(&rq->sched.link, pl); + } else { + rq->engine = owner; + owner->submit_request(rq); + } active = rq; } @@ -308,7 +339,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * stream, so give it the equivalent small priority bump to prevent * it being gazumped a second time by another peer. */ - if (!(prio & I915_PRIORITY_NEWCLIENT)) { + if (!(prio & I915_PRIORITY_NEWCLIENT) && + active->hw_context->owner == engine) { prio |= I915_PRIORITY_NEWCLIENT; list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); @@ -544,6 +576,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) execlists)); } +static void virtual_update_register_offsets(u32 *regs, + struct intel_engine_cs *engine) +{ + u32 base = engine->mmio_base; + + regs[CTX_CONTEXT_CONTROL] = + i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine)); + regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); + regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); + regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); + regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); + + regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); + regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); + regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); + regs[CTX_SECOND_BB_HEAD_U] = + i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); + regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); + regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); + + regs[CTX_CTX_TIMESTAMP] = + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); + regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3)); + regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3)); + regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2)); + regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2)); + regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1)); + regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1)); + regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0)); + regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0)); + + if (engine->class == RENDER_CLASS) { + regs[CTX_RCS_INDIRECT_CTX] = + i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); + regs[CTX_RCS_INDIRECT_CTX_OFFSET] = + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); + regs[CTX_BB_PER_CTX_PTR] = + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); + + regs[CTX_R_PWR_CLK_STATE] = + i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + } +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -553,6 +629,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_request *last = port_request(port); struct rb_node *rb; bool submit = false; + const struct intel_context *q_context; + int q_priority; /* * Hardware submission is through 2 ports. Conceptually each port @@ -576,6 +654,35 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ +restart_virtual_engine: + q_context = execlists->queue_context; + q_priority = execlists->queue_priority; + for (rb = rb_first_cached(&execlists->virtual); rb; ) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + struct intel_engine_cs *active; + + if (!rq) { + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&execlists->virtual); + continue; + } + + active = READ_ONCE(ve->context.active); + if (active && active != engine) { + rb = rb_next(rb); + continue; + } + + if (q_priority >= rq_prio(rq)) { + q_priority = rq_prio(rq); + q_context = rq->hw_context; + } + break; + } + if (last) { /* * Don't resubmit or switch until all outstanding @@ -597,10 +704,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last, - execlists->queue_priority, - execlists->queue_context)) { - GEM_BUG_ON(execlists->queue_priority == INT_MIN); + if (need_preempt(engine, last, q_priority, q_context)) { + GEM_BUG_ON(q_priority == INT_MIN); inject_preempt_context(engine); return; } @@ -640,6 +745,67 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->tail = last->wa_tail; } + if (rb) { /* XXX virtual is always taking precedence */ + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq; + + spin_lock(&ve->base.execution_lock); + + rq = ve->request; + if (unlikely(!rq)) { /* lost the race to a sibling */ + spin_unlock(&ve->base.execution_lock); + goto restart_virtual_engine; + } + + if (rq_prio(rq) >= q_priority) { + if (last && !can_merge_rq(rq, last)) { + spin_unlock(&ve->base.execution_lock); + return; + } + + GEM_BUG_ON(rq->engine != &ve->base); + ve->request = NULL; + ve->base.execlists.queue_priority = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + GEM_BUG_ON(rq->hw_context != &ve->context); + rq->engine = engine; + + if (engine != ve->bound) { + u32 *regs = ve->context.lrc_reg_state; + unsigned int n; + + GEM_BUG_ON(READ_ONCE(ve->context.active)); + virtual_update_register_offsets(regs, engine); + ve->bound = engine; + + /* + * Move the bound engine to the top of the list + * for future execution. We then kick this + * tasklet first before checking others, so that + * we preferentially reuse this set of bound + * registers. + */ + for (n = 1; n < ve->count; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], + ve->siblings[0]); + break; + } + } + } + + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); + submit = true; + last = rq; + } + + spin_unlock(&ve->base.execution_lock); + } + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -2723,6 +2889,246 @@ void intel_lr_context_resume(struct drm_i915_private *i915) } } +static void virtual_engine_free(struct kref *kref) +{ + struct virtual_engine *ve = container_of(kref, typeof(*ve), kref); + unsigned int n; + + GEM_BUG_ON(ve->request); + GEM_BUG_ON(ve->context.active); + + for (n = 0; n < ve->count; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct rb_node *node = &ve->nodes[sibling->id].rb; + + if (RB_EMPTY_NODE(node)) + continue; + + spin_lock_irq(&sibling->execution_lock); + + if (!RB_EMPTY_NODE(node)) + rb_erase_cached(node, &sibling->execlists.virtual); + + spin_unlock_irq(&sibling->execution_lock); + } + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + + if (ve->context.state) + execlists_context_destroy(&ve->context); + + kfree(ve); +} + +static void virtual_context_unpin(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + execlists_context_unpin(ce); + + kref_put(&ve->kref, virtual_engine_free); +} + +static const struct intel_context_ops virtual_context_ops = { + .unpin = virtual_context_unpin, +}; + +static struct intel_context * +virtual_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + struct intel_context *ce = &ve->context; + + lockdep_assert_held(&ctx->i915->drm.struct_mutex); + + if (likely(ce->pin_count++)) + return ce; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + + kref_get(&ve->kref); + ce->ops = &virtual_context_ops; + + if (!ve->bound) + ve->bound = ve->siblings[0]; + + /* Note: we must use a real engine class for setting up reg state */ + return __execlists_context_pin(ve->bound, ctx, ce); +} + +static void virtual_submission_tasklet(unsigned long data) +{ + struct virtual_engine * const ve = (struct virtual_engine *)data; + unsigned int n; + int prio; + + prio = READ_ONCE(ve->base.execlists.queue_priority); + if (prio == INT_MIN) + return; + + local_irq_disable(); + for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct ve_node * const node = &ve->nodes[sibling->id]; + struct rb_node **parent, *rb; + bool first; + + spin_lock(&sibling->execution_lock); + + if (!RB_EMPTY_NODE(&node->rb)) { + first = rb_first_cached(&sibling->execlists.virtual) == &node->rb; + if (prio == node->prio || (prio > node->prio && first)) + goto submit_engine; + + rb_erase_cached(&node->rb, &sibling->execlists.virtual); + } + + rb = NULL; + first = true; + parent = &sibling->execlists.virtual.rb_root.rb_node; + while (*parent) { + struct ve_node *other; + + rb = *parent; + other = rb_entry(rb, typeof(*other), rb); + if (prio > other->prio) { + parent = &rb->rb_left; + } else { + parent = &rb->rb_right; + first = false; + } + } + + rb_link_node(&node->rb, rb, parent); + rb_insert_color_cached(&node->rb, + &sibling->execlists.virtual, + first); + +submit_engine: + GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); + node->prio = prio; + if (first && prio > sibling->execlists.queue_priority) + tasklet_hi_schedule(&sibling->execlists.tasklet); + + spin_unlock(&sibling->execution_lock); + } + local_irq_enable(); +} + +static void virtual_submit_request(struct i915_request *request) +{ + struct virtual_engine *ve = to_virtual_engine(request->engine); + + GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); + + GEM_BUG_ON(ve->request); + ve->base.execlists.queue_priority = rq_prio(request); + WRITE_ONCE(ve->request, request); + + tasklet_schedule(&ve->base.execlists.tasklet); +} + +struct intel_engine_cs * +intel_execlists_create_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count) +{ + struct virtual_engine *ve; + unsigned int n; + int err; + + if (!count) + return ERR_PTR(-EINVAL); + + ve = kzalloc(sizeof(*ve) + count * sizeof(*ve->siblings), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + kref_init(&ve->kref); + ve->base.i915 = ctx->i915; + ve->base.id = -1; + ve->base.class = OTHER_CLASS; + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + INIT_LIST_HEAD(&ve->base.requests); + spin_lock_init(&ve->base.execution_lock); + lockdep_set_subclass(&ve->base.execution_lock, EXECUTION_VIRTUAL); + + intel_context_init(&ve->context, ctx, &ve->base); + + ve->base.context_pin = virtual_context_pin; + ve->base.request_alloc = execlists_request_alloc; + + ve->base.schedule = i915_schedule; + ve->base.submit_request = virtual_submit_request; + + ve->base.execlists.queue_priority = INT_MIN; + tasklet_init(&ve->base.execlists.tasklet, + virtual_submission_tasklet, + (unsigned long)ve); + + ve->count = count; + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + ve->siblings[n] = sibling; + ve->base.mask |= sibling->mask; + + if (sibling->execlists.tasklet.func != execlists_submission_tasklet) { + err = -ENODEV; + ve->count = n; + goto err_put; + } + + if (RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)) { + err = -EINVAL; + ve->count = n; + goto err_put; + } + + RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); + + if (ve->base.class != OTHER_CLASS) { + if (ve->base.class != sibling->class) { + err = -EINVAL; + ve->count = n; + goto err_put; + } + continue; + } + + ve->base.class = sibling->class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_breadcrumb = sibling->emit_breadcrumb; + ve->base.emit_breadcrumb_sz = sibling->emit_breadcrumb_sz; + } + + return &ve->base; + +err_put: + virtual_engine_free(&ve->kref); + return ERR_PTR(err); +} + +void intel_virtual_engine_put(struct intel_engine_cs *engine) +{ + if (!engine) + return; + + if (engine->id != -1) + return; + + kref_put(&to_virtual_engine(engine)->kref, virtual_engine_free); +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -2780,6 +3186,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tQ "); } + last = NULL; + count = 0; + for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (rq) { + if (count++ < max - 1) + show_request(m, rq, "\t\tV "); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d virtual requests...\n", + count - max); + } + show_request(m, last, "\t\tV "); + } + spin_unlock_irqrestore(&engine->execution_lock, flags); } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3d86c27c6b32..c2a5ede8c126 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -112,4 +112,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); +struct intel_engine_cs * +intel_execlists_create_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count); +void intel_virtual_engine_put(struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c1afe77260d0..b1497b2ada2a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -317,6 +317,7 @@ struct intel_engine_execlists { * @queue: queue of requests, in priority lists */ struct rb_root_cached queue; + struct rb_root_cached virtual; /** * @csb_write: control register for Context Switch buffer @@ -508,6 +509,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_IS_VIRTUAL BIT(3) unsigned int flags; /* @@ -590,6 +592,12 @@ static inline bool __execlists_need_preempt(int prio, int last) return prio > max(0, last); } +static inline bool +intel_engine_is_virtual(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_IS_VIRTUAL; +} + static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 3e68c2888b9c..22beeb674b17 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -27,6 +27,7 @@ #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" +#include "igt_live_test.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -34,77 +35,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - t->reset_global = i915_reset_count(&i915->gpu_error); - - for_each_engine(engine, i915, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - return -EIO; - - if (t->reset_global != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_global); - return -EIO; - } - - for_each_engine(engine, i915, id) { - if (t->reset_engine[id] == - i915_reset_engine_count(&i915->gpu_error, engine)) - continue; - - pr_err("%s(%s): engine '%s' was reset %d times!\n", - t->func, t->name, engine->name, - i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); - return -EIO; - } - - return 0; -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -114,7 +44,7 @@ static int live_nop_switch(void *arg) enum intel_engine_id id; intel_wakeref_t wakeref; struct drm_file *file; - struct live_test t; + struct igt_live_test t; unsigned long n; int err = -ENODEV; @@ -178,7 +108,7 @@ static int live_nop_switch(void *arg) pr_info("Populated %d contexts on %s in %lluns\n", nctx, engine->name, ktime_to_ns(times[1] - times[0])); - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -226,7 +156,7 @@ static int live_nop_switch(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -576,7 +506,7 @@ static int igt_ctx_exec(void *arg) struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; + struct igt_live_test t; if (!intel_engine_can_store_dword(engine)) continue; @@ -590,7 +520,7 @@ static int igt_ctx_exec(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -651,7 +581,7 @@ static int igt_ctx_exec(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -684,7 +614,7 @@ static int igt_shared_ctx_exec(void *arg) struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; + struct igt_live_test t; if (!intel_engine_can_store_dword(engine)) continue; @@ -695,7 +625,7 @@ static int igt_shared_ctx_exec(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -774,7 +704,7 @@ static int igt_shared_ctx_exec(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -797,7 +727,7 @@ static int igt_ctx_readonly(void *arg) I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; + struct igt_live_test t; int err = -ENODEV; /* @@ -812,7 +742,7 @@ static int igt_ctx_readonly(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -891,7 +821,7 @@ static int igt_ctx_readonly(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -1119,7 +1049,7 @@ static int igt_vm_isolation(void *arg) struct drm_file *file; I915_RND_STATE(prng); unsigned long count; - struct live_test t; + struct igt_live_test t; unsigned int id; u64 vm_total; int err; @@ -1138,7 +1068,7 @@ static int igt_vm_isolation(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -1209,7 +1139,7 @@ static int igt_vm_isolation(void *arg) out_rpm: intel_runtime_pm_put(i915, wakeref); out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index fa079c6d9c65..f60ba9b3990c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -26,6 +26,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_live_test.h" #include "lib_sw_fence.h" #include "mock_context.h" @@ -506,66 +507,12 @@ int i915_request_mock_selftests(void) return err; } -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_count; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - t->reset_count = i915_reset_count(&i915->gpu_error); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - - i915_retire_requests(i915); - - if (wait_for(intel_engines_are_idle(i915), 10)) { - pr_err("%s(%s): GPU not idle\n", t->func, t->name); - return -EIO; - } - - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_count); - return -EIO; - } - - return 0; -} - static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; intel_wakeref_t wakeref; - struct live_test t; + struct igt_live_test t; unsigned int id; int err = -ENODEV; @@ -583,7 +530,7 @@ static int live_nop_request(void *arg) IGT_TIMEOUT(end_time); ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -625,7 +572,7 @@ static int live_nop_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -713,7 +660,7 @@ static int live_empty_request(void *arg) struct intel_engine_cs *engine; intel_wakeref_t wakeref; struct i915_vma *batch; - struct live_test t; + struct igt_live_test t; unsigned int id; int err = 0; @@ -737,7 +684,7 @@ static int live_empty_request(void *arg) unsigned long n, prime; ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_batch; @@ -773,7 +720,7 @@ static int live_empty_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_batch; @@ -873,7 +820,7 @@ static int live_all_engines(void *arg) struct i915_request *request[I915_NUM_ENGINES]; intel_wakeref_t wakeref; struct i915_vma *batch; - struct live_test t; + struct igt_live_test t; unsigned int id; int err; @@ -885,7 +832,7 @@ static int live_all_engines(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -957,7 +904,7 @@ static int live_all_engines(void *arg) request[id] = NULL; } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) @@ -978,7 +925,7 @@ static int live_sequential_engines(void *arg) struct i915_request *prev = NULL; struct intel_engine_cs *engine; intel_wakeref_t wakeref; - struct live_test t; + struct igt_live_test t; unsigned int id; int err; @@ -991,7 +938,7 @@ static int live_sequential_engines(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -1074,7 +1021,7 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(!i915_request_completed(request[id])); } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) { @@ -1152,7 +1099,7 @@ static int live_breadcrumbs_smoketest(void *arg) enum intel_engine_id id; intel_wakeref_t wakeref; struct drm_file *file; - struct live_test live; + struct igt_live_test live; unsigned int n; int ret = 0; @@ -1190,7 +1137,7 @@ static int live_breadcrumbs_smoketest(void *arg) } } - ret = begin_live_test(&live, i915, __func__, ""); + ret = igt_live_test_begin(&live, i915, __func__, ""); if (ret) goto out_contexts; @@ -1248,7 +1195,7 @@ static int live_breadcrumbs_smoketest(void *arg) num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); mutex_lock(&i915->drm.struct_mutex); - ret = end_live_test(&live) ?: ret; + ret = igt_live_test_end(&live) ?: ret; out_contexts: mutex_unlock(&i915->drm.struct_mutex); kfree(t[0].contexts); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c new file mode 100644 index 000000000000..3e902761cd16 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" +#include "igt_live_test.h" + +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + t->reset_global = i915_reset_count(&i915->gpu_error); + + for_each_engine(engine, i915, id) + t->reset_engine[id] = + i915_reset_engine_count(&i915->gpu_error, engine); + + return 0; +} + +int igt_live_test_end(struct igt_live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + if (t->reset_global != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_global); + return -EIO; + } + + for_each_engine(engine, i915, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; + + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h new file mode 100644 index 000000000000..94570044675e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef IGT_LIVE_TEST_H +#define IGT_LIVE_TEST_H + +#include "../i915_gem.h" + +struct drm_i915_private; + +struct igt_live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_global; + unsigned int reset_engine[I915_NUM_ENGINES]; +}; + +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name); +int igt_live_test_end(struct igt_live_test *t); + +#endif /* IGT_LIVE_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 607c969ea605..bb0ea918a9ef 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/prime_numbers.h> + #include "../i915_reset.h" #include "../i915_selftest.h" @@ -641,6 +643,212 @@ static int live_preempt_smoke(void *arg) return err; } +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; + bool wedge; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + t->i915 = i915; + t->func = func; + t->name = name; + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + return 0; +} + +static int nop_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling, + unsigned int nctx, + unsigned int flags) +#define CHAIN BIT(0) +{ + IGT_TIMEOUT(end_time); + struct i915_request *request[16]; + struct i915_gem_context *ctx[16]; + struct intel_engine_cs *ve[16]; + unsigned long n, prime, nc; + ktime_t times[2] = {}; + struct live_test t; + int err; + + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); + + for (n = 0; n < nctx; n++) { + ctx[n] = kernel_context(i915); + if (!ctx[n]) + return -ENOMEM; + + ve[n] = intel_execlists_create_virtual(ctx[n], + siblings, nsibling); + if (IS_ERR(ve[n])) + return PTR_ERR(ve[n]); + } + + err = begin_live_test(&t, i915, __func__, ve[0]->name); + if (err) + goto out; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + if (flags & CHAIN) { + for (nc = 0; nc < nctx; nc++) { + for (n = 0; n < prime; n++) { + request[nc] = + i915_request_alloc(ve[nc], ctx[nc]); + if (IS_ERR(request[nc])) { + err = PTR_ERR(request[nc]); + goto out; + } + + i915_request_add(request[nc]); + } + } + } else { + for (n = 0; n < prime; n++) { + for (nc = 0; nc < nctx; nc++) { + request[nc] = + i915_request_alloc(ve[nc], ctx[nc]); + if (IS_ERR(request[nc])) { + err = PTR_ERR(request[nc]); + goto out; + } + + i915_request_add(request[nc]); + } + } + } + + for (nc = 0; nc < nctx; nc++) { + if (i915_request_wait(request[nc], + I915_WAIT_LOCKED, + HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve[0]->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve[0]->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + break; + } + } + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out; + + pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", + nctx, ve[0]->name, ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (nc = 0; nc < nctx; nc++) { + intel_virtual_engine_put(ve[nc]); + kernel_context_close(ctx[nc]); + } + return err; +} + +static int live_virtual_engine(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int class, inst; + int err = -ENODEV; + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + err = nop_virtual_engine(i915, &engine, 1, 1, 0); + if (err) { + pr_err("Failed to wrap engine %s: err=%d\n", + engine->name, err); + goto out_unlock; + } + } + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, n; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!i915->engine_class[class][inst]) + break; + + siblings[nsibling++] = i915->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + for (n = 1; n <= nsibling + 1; n++) { + err = nop_virtual_engine(i915, siblings, nsibling, + n, 0); + if (err) + goto out_unlock; + } + + err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + if (err) + goto out_unlock; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -649,6 +857,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_late_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), + SUBTEST(live_virtual_engine), }; if (!HAS_EXECLISTS(i915)) diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index f13f9c726034..47ed6de311bc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -45,14 +45,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->handles_list); INIT_LIST_HEAD(&ctx->hw_id_link); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - INIT_LIST_HEAD(&ce->active_link); - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]); ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 84f42317de92..40a758dbe18d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -123,6 +123,7 @@ enum drm_i915_gem_engine_class { }; #define I915_ENGINE_CLASS_INVALID_NONE -1 +#define I915_ENGINE_CLASS_INVALID_VIRTUAL 0 /** * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 @@ -1488,8 +1489,34 @@ struct drm_i915_gem_context_param { __u64 value; }; +/* + * i915_context_engines_load_balance: + * + * Enable load balancing across this set of engines. + * + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when + * used will proxy the execbuffer request onto one of the set of engines + * in such a way as to distribute the load evenly across the set. + * + * The set of engines must be compatible (e.g. the same HW class) as they + * will share the same logical GPU context and ring. + * + * To intermix rendering with the virtual engine and direct rendering onto + * the backing engines (bypassing the load balancing proxy), the context must + * be defined to use a single timeline for all engines. + */ +struct i915_context_engines_load_balance { + struct i915_user_extension base; + + __u64 flags; /* all undefined flags must be zero */ + __u64 engines_mask; /* selection mask of engines[] */ + + __u64 mbz[4]; /* reserved for future use; must be zero */ +}; + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 struct { __u16 engine_class; /* see enum drm_i915_gem_engine_class */

[38/38] drm/i915: Load balancing across a virtual engine

Commit Message

Patch