[RFC,3/5] drm/i915: split out virtual engine code

Message ID	20191211211244.7831-4-daniele.ceraolospurio@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=dWke=2B=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 651E820637 From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> To: intel-gfx@lists.freedesktop.org Date: Wed, 11 Dec 2019 13:12:42 -0800 Message-Id: <20191211211244.7831-4-daniele.ceraolospurio@intel.com> In-Reply-To: <20191211211244.7831-1-daniele.ceraolospurio@intel.com> References: <20191211211244.7831-1-daniele.ceraolospurio@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [RFC 3/5] drm/i915: split out virtual engine code Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	Split up intel_lrc.c \| expand [RFC,0/5] Split up intel_lrc.c [RFC,1/5] drm/i915: introduce logical_ring and lr_context naming [RFC,2/5] drm/i915: Move struct intel_virtual_engine to its own header [RFC,3/5] drm/i915: split out virtual engine code [RFC,4/5] drm/i915: move execlists selftests to their own file [RFC,5/5] drm/i915: introduce intel_execlists_submission.<c/h>

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e0fd10c0cfb8..79f5ef5acd4c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -98,6 +98,7 @@ gt-y += \ gt/intel_rps.o \ gt/intel_sseu.o \ gt/intel_timeline.o \ + gt/intel_virtual_engine.o \ gt/intel_workarounds.o # autogenerated null render state gt-y += \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 46b4d1d643f8..6461370223b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -74,6 +74,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" #include "gt/intel_ring.h" +#include "gt/intel_virtual_engine.h" #include "i915_gem_context.h" #include "i915_globals.h" @@ -1536,7 +1537,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) } } - ce = intel_execlists_create_virtual(set->ctx, siblings, n); + ce = intel_virtual_engine_create(set->ctx, siblings, n); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out_siblings; @@ -1999,7 +2000,7 @@ static int clone_engines(struct i915_gem_context *dst, */ if (intel_engine_is_virtual(engine)) clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); + intel_virtual_engine_clone(dst, engine); else clone->engines[n] = intel_context_create(dst, engine); if (IS_ERR_OR_NULL(clone->engines[n])) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 397186818305..33ab0e5bfa41 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -9,6 +9,7 @@ #include "i915_drv.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" +#include "intel_virtual_engine.h" static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e6dea2d3a5c0..3afae9a44911 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -147,7 +147,7 @@ #include "intel_mocs.h" #include "intel_reset.h" #include "intel_ring.h" -#include "intel_virtual_engine_types.h" +#include "intel_virtual_engine.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -181,16 +181,6 @@ #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) -static struct intel_virtual_engine * -to_virtual_engine(struct intel_engine_cs *engine) -{ - GEM_BUG_ON(!intel_engine_is_virtual(engine)); - return container_of(engine, struct intel_virtual_engine, base); -} - -static int lr_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); - static void lr_context_init_reg_state(u32 *reg_state, const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -805,6 +795,12 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) } } +u32 *intel_lr_context_set_register_offsets(u32 *regs, + const struct intel_engine_cs *engine) +{ + return set_offsets(regs, reg_offsets(engine), engine); +} + static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { @@ -1403,12 +1399,6 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } -static void virtual_update_register_offsets(u32 *regs, - struct intel_engine_cs *engine) -{ - set_offsets(regs, reg_offsets(engine), engine); -} - static bool virtual_matches(const struct intel_virtual_engine *ve, const struct i915_request *rq, const struct intel_engine_cs *engine) @@ -1802,8 +1792,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(READ_ONCE(ve->context.inflight)); if (!intel_engine_has_relative_mmio(engine)) - virtual_update_register_offsets(regs, - engine); + intel_lr_context_set_register_offsets(regs, + engine); if (!list_empty(&ve->context.signals)) virtual_xfer_breadcrumbs(ve, engine); @@ -2339,12 +2329,6 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->active.lock, flags); } -static void lr_context_fini(struct intel_context *ce) -{ - intel_ring_put(ce->ring); - i915_vma_put(ce->state); -} - static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); @@ -2353,7 +2337,7 @@ static void execlists_context_destroy(struct kref *kref) GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) - lr_context_fini(ce); + intel_lr_context_fini(ce); intel_context_fini(ce); intel_context_free(ce); @@ -2384,7 +2368,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) engine->name); } -static void intel_lr_context_unpin(struct intel_context *ce) +void intel_lr_context_unpin(struct intel_context *ce) { check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, ce->engine); @@ -2416,8 +2400,9 @@ lr_context_update_reg_state(const struct intel_context *ce, } } -static int -lr_context_pin(struct intel_context *ce, struct intel_engine_cs *engine) +int +intel_lr_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine) { void *vaddr; int ret; @@ -2451,12 +2436,12 @@ lr_context_pin(struct intel_context *ce, struct intel_engine_cs *engine) static int execlists_context_pin(struct intel_context *ce) { - return lr_context_pin(ce, ce->engine); + return intel_lr_context_pin(ce, ce->engine); } static int execlists_context_alloc(struct intel_context *ce) { - return lr_context_alloc(ce, ce->engine); + return intel_lr_context_alloc(ce, ce->engine); } static void execlists_context_reset(struct intel_context *ce) @@ -4030,7 +4015,7 @@ static void lr_context_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + u32 *bbe = intel_lr_context_set_register_offsets(regs, engine); if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *bbe = MI_BATCH_BUFFER_END; @@ -4098,8 +4083,8 @@ populate_lr_context(struct intel_context *ce, return ret; } -static int lr_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) +int intel_lr_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; struct intel_ring *ring; @@ -4159,123 +4144,12 @@ static int lr_context_alloc(struct intel_context *ce, return ret; } -static struct list_head *virtual_queue(struct intel_virtual_engine *ve) +void intel_lr_context_fini(struct intel_context *ce) { - return &ve->base.execlists.default_priolist.requests[0]; -} - -static void virtual_context_destroy(struct kref *kref) -{ - struct intel_virtual_engine *ve = - container_of(kref, typeof(*ve), context.ref); - unsigned int n; - - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - GEM_BUG_ON(ve->request); - GEM_BUG_ON(ve->context.inflight); - - for (n = 0; n < ve->num_siblings; n++) { - struct intel_engine_cs *sibling = ve->siblings[n]; - struct rb_node *node = &ve->nodes[sibling->id].rb; - unsigned long flags; - - if (RB_EMPTY_NODE(node)) - continue; - - spin_lock_irqsave(&sibling->active.lock, flags); - - /* Detachment is lazily performed in the execlists tasklet */ - if (!RB_EMPTY_NODE(node)) - rb_erase_cached(node, &sibling->execlists.virtual); - - spin_unlock_irqrestore(&sibling->active.lock, flags); - } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); - - if (ve->context.state) - lr_context_fini(&ve->context); - intel_context_fini(&ve->context); - - kfree(ve->bonds); - kfree(ve); -} - -static void virtual_engine_initial_hint(struct intel_virtual_engine *ve) -{ - int swp; - - /* - * Pick a random sibling on starting to help spread the load around. - * - * New contexts are typically created with exactly the same order - * of siblings, and often started in batches. Due to the way we iterate - * the array of sibling when submitting requests, sibling[0] is - * prioritised for dequeuing. If we make sure that sibling[0] is fairly - * randomised across the system, we also help spread the load by the - * first engine we inspect being different each time. - * - * NB This does not force us to execute on this engine, it will just - * typically be the first we inspect for submission. - */ - swp = prandom_u32_max(ve->num_siblings); - if (!swp) - return; - - swap(ve->siblings[swp], ve->siblings[0]); - if (!intel_engine_has_relative_mmio(ve->siblings[0])) - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); -} - -static int virtual_context_pin(struct intel_context *ce) -{ - struct intel_virtual_engine *ve = - container_of(ce, typeof(*ve), context); - int err; - - /* Note: we must use a real engine class for setting up reg state */ - err = lr_context_pin(ce, ve->siblings[0]); - if (err) - return err; - - virtual_engine_initial_hint(ve); - return 0; -} - -static void virtual_context_enter(struct intel_context *ce) -{ - struct intel_virtual_engine *ve = - container_of(ce, typeof(*ve), context); - unsigned int n; - - for (n = 0; n < ve->num_siblings; n++) - intel_engine_pm_get(ve->siblings[n]); - - intel_timeline_enter(ce->timeline); -} - -static void virtual_context_exit(struct intel_context *ce) -{ - struct intel_virtual_engine *ve = - container_of(ce, typeof(*ve), context); - unsigned int n; - - intel_timeline_exit(ce->timeline); - - for (n = 0; n < ve->num_siblings; n++) - intel_engine_pm_put(ve->siblings[n]); + intel_ring_put(ce->ring); + i915_vma_put(ce->state); } -static const struct intel_context_ops virtual_context_ops = { - .pin = virtual_context_pin, - .unpin = intel_lr_context_unpin, - - .enter = virtual_context_enter, - .exit = virtual_context_exit, - - .destroy = virtual_context_destroy, -}; - static intel_engine_mask_t virtual_submission_mask(struct intel_virtual_engine *ve) { @@ -4414,8 +4288,8 @@ static void virtual_submit_request(struct i915_request *rq) ve->base.execlists.queue_priority_hint = rq_prio(rq); ve->request = i915_request_get(rq); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - list_move_tail(&rq->sched.link, virtual_queue(ve)); + GEM_BUG_ON(!list_empty(intel_virtual_engine_queue(ve))); + list_move_tail(&rq->sched.link, intel_virtual_engine_queue(ve)); tasklet_schedule(&ve->base.execlists.tasklet); } @@ -4423,20 +4297,6 @@ static void virtual_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&ve->base.active.lock, flags); } -static struct ve_bond * -virtual_find_bond(struct intel_virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { @@ -4446,7 +4306,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) allowed = ~to_request(signal)->engine->mask; - bond = virtual_find_bond(ve, to_request(signal)->engine); + bond = intel_virtual_engine_find_bond(ve, to_request(signal)->engine); if (bond) allowed &= bond->sibling_mask; @@ -4459,225 +4319,14 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) to_request(signal)->execution_mask &= ~allowed; } -struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, - unsigned int count) +void intel_execlists_virtual_submission_init(struct intel_virtual_engine *ve) { - struct intel_virtual_engine *ve; - unsigned int n; - int err; - - if (count == 0) - return ERR_PTR(-EINVAL); - - if (count == 1) - return intel_context_create(ctx, siblings[0]); - - ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); - if (!ve) - return ERR_PTR(-ENOMEM); - - ve->base.i915 = ctx->i915; - ve->base.gt = siblings[0]->gt; - ve->base.uncore = siblings[0]->uncore; - ve->base.id = -1; - ve->base.class = OTHER_CLASS; - ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; - ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - - /* - * The decision on whether to submit a request using semaphores - * depends on the saturated state of the engine. We only compute - * this during HW submission of the request, and we need for this - * state to be globally applied to all requests being submitted - * to this engine. Virtual engines encompass more than one physical - * engine and so we cannot accurately tell in advance if one of those - * engines is already saturated and so cannot afford to use a semaphore - * and be pessimized in priority for doing so -- if we are the only - * context using semaphores after all other clients have stopped, we - * will be starved on the saturated system. Such a global switch for - * semaphores is less than ideal, but alas is the current compromise. - */ - ve->base.saturated = ALL_ENGINES; - - snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - intel_engine_init_breadcrumbs(&ve->base); - - intel_engine_init_execlists(&ve->base); - - ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - - ve->base.schedule = i915_schedule; ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; - - INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_init(&ve->base.execlists.tasklet, virtual_submission_tasklet, (unsigned long)ve); - - intel_context_init(&ve->context, ctx, &ve->base); - - for (n = 0; n < count; n++) { - struct intel_engine_cs *sibling = siblings[n]; - - GEM_BUG_ON(!is_power_of_2(sibling->mask)); - if (sibling->mask & ve->base.mask) { - DRM_DEBUG("duplicate %s entry in load balancer\n", - sibling->name); - err = -EINVAL; - goto err_put; - } - - /* - * The virtual engine implementation is tightly coupled to - * the execlists backend -- we push out request directly - * into a tree inside each physical engine. We could support - * layering if we handle cloning of the requests and - * submitting a copy into each backend. - */ - if (sibling->execlists.tasklet.func != - execlists_submission_tasklet) { - err = -ENODEV; - goto err_put; - } - - GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); - RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); - - ve->siblings[ve->num_siblings++] = sibling; - ve->base.mask |= sibling->mask; - - /* - * All physical engines must be compatible for their emission - * functions (as we build the instructions during request - * construction and do not alter them before submission - * on the physical engine). We use the engine class as a guide - * here, although that could be refined. - */ - if (ve->base.class != OTHER_CLASS) { - if (ve->base.class != sibling->class) { - DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", - sibling->class, ve->base.class); - err = -EINVAL; - goto err_put; - } - continue; - } - - ve->base.class = sibling->class; - ve->base.uabi_class = sibling->uabi_class; - snprintf(ve->base.name, sizeof(ve->base.name), - "v%dx%d", ve->base.class, count); - ve->base.context_size = sibling->context_size; - - ve->base.emit_bb_start = sibling->emit_bb_start; - ve->base.emit_flush = sibling->emit_flush; - ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; - ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; - ve->base.emit_fini_breadcrumb_dw = - sibling->emit_fini_breadcrumb_dw; - - ve->base.flags = sibling->flags; - } - - ve->base.flags |= I915_ENGINE_IS_VIRTUAL; - - err = lr_context_alloc(&ve->context, siblings[0]); - if (err) - goto err_put; - - __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); - - return &ve->context; - -err_put: - intel_context_put(&ve->context); - return ERR_PTR(err); -} - -struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src) -{ - struct intel_virtual_engine *se = to_virtual_engine(src); - struct intel_context *dst; - - dst = intel_execlists_create_virtual(ctx, - se->siblings, - se->num_siblings); - if (IS_ERR(dst)) - return dst; - - if (se->num_bonds) { - struct intel_virtual_engine *de = - to_virtual_engine(dst->engine); - - de->bonds = kmemdup(se->bonds, - sizeof(*se->bonds) * se->num_bonds, - GFP_KERNEL); - if (!de->bonds) { - intel_context_put(dst); - return ERR_PTR(-ENOMEM); - } - - de->num_bonds = se->num_bonds; - } - - return dst; -} - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling) -{ - struct intel_virtual_engine *ve = to_virtual_engine(engine); - struct ve_bond *bond; - int n; - - /* Sanity check the sibling is part of the virtual engine */ - for (n = 0; n < ve->num_siblings; n++) - if (sibling == ve->siblings[n]) - break; - if (n == ve->num_siblings) - return -EINVAL; - - bond = virtual_find_bond(ve, master); - if (bond) { - bond->sibling_mask |= sibling->mask; - return 0; - } - - bond = krealloc(ve->bonds, - sizeof(*bond) * (ve->num_bonds + 1), - GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond[ve->num_bonds].master = master; - bond[ve->num_bonds].sibling_mask = sibling->mask; - - ve->bonds = bond; - ve->num_bonds++; - - return 0; -} - -struct intel_engine_cs * -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, - unsigned int sibling) -{ - struct intel_virtual_engine *ve = to_virtual_engine(engine); - - if (sibling >= ve->num_siblings) - return NULL; - - return ve->siblings[sibling]; } void intel_execlists_show_requests(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 04511d8ebdc1..93f30b2deb7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -33,6 +33,7 @@ struct i915_gem_context; struct i915_request; struct intel_context; struct intel_engine_cs; +struct intel_virtual_engine; /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -98,11 +99,22 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); void intel_execlists_set_default_submission(struct intel_engine_cs *engine); +int intel_lr_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); +void intel_lr_context_fini(struct intel_context *ce); + +u32 *intel_lr_context_set_register_offsets(u32 *regs, + const struct intel_engine_cs *engine); + void intel_lr_context_reset(struct intel_engine_cs *engine, struct intel_context *ce, u32 head, bool scrub); +int intel_lr_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine); +void intel_lr_context_unpin(struct intel_context *ce); + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -110,22 +122,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); -struct intel_context * -intel_execlists_create_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs **siblings, - unsigned int count); - -struct intel_context * -intel_execlists_clone_virtual(struct i915_gem_context *ctx, - struct intel_engine_cs *src); - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling); - -struct intel_engine_cs * -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, - unsigned int sibling); +void intel_execlists_virtual_submission_init(struct intel_virtual_engine *ve); bool intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_virtual_engine.c b/drivers/gpu/drm/i915/gt/intel_virtual_engine.c new file mode 100644 index 000000000000..6ec3752132bc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_virtual_engine.c @@ -0,0 +1,359 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <drm/drm_print.h> +#include <linux/slab.h> + +#include "gem/i915_gem_context.h" + +#include "i915_gem.h" +#include "intel_context.h" +#include "intel_engine.h" +#include "intel_engine_pm.h" +#include "intel_lrc.h" +#include "intel_timeline.h" +#include "intel_virtual_engine.h" + +static void virtual_context_destroy(struct kref *kref) +{ + struct intel_virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + unsigned int n; + + GEM_BUG_ON(!list_empty(intel_virtual_engine_queue(ve))); + GEM_BUG_ON(ve->request); + GEM_BUG_ON(ve->context.inflight); + + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct rb_node *node = &ve->nodes[sibling->id].rb; + unsigned long flags; + + if (RB_EMPTY_NODE(node)) + continue; + + spin_lock_irqsave(&sibling->active.lock, flags); + + /* Detachment is lazily performed in the execlists tasklet */ + if (!RB_EMPTY_NODE(node)) + rb_erase_cached(node, &sibling->execlists.virtual); + + spin_unlock_irqrestore(&sibling->active.lock, flags); + } + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + + if (ve->context.state) + intel_lr_context_fini(&ve->context); + intel_context_fini(&ve->context); + + kfree(ve->bonds); + kfree(ve); +} + +static void virtual_engine_initial_hint(struct intel_virtual_engine *ve) +{ + int swp; + + /* + * Pick a random sibling on starting to help spread the load around. + * + * New contexts are typically created with exactly the same order + * of siblings, and often started in batches. Due to the way we iterate + * the array of sibling when submitting requests, sibling[0] is + * prioritised for dequeuing. If we make sure that sibling[0] is fairly + * randomised across the system, we also help spread the load by the + * first engine we inspect being different each time. + * + * NB This does not force us to execute on this engine, it will just + * typically be the first we inspect for submission. + */ + swp = prandom_u32_max(ve->num_siblings); + if (!swp) + return; + + swap(ve->siblings[swp], ve->siblings[0]); + if (!intel_engine_has_relative_mmio(ve->siblings[0])) + intel_lr_context_set_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); +} + +static int virtual_context_pin(struct intel_context *ce) +{ + struct intel_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + int err; + + /* Note: we must use a real engine class for setting up reg state */ + err = intel_lr_context_pin(ce, ve->siblings[0]); + if (err) + return err; + + virtual_engine_initial_hint(ve); + return 0; +} + +static void virtual_context_enter(struct intel_context *ce) +{ + struct intel_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + unsigned int n; + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->timeline); +} + +static void virtual_context_exit(struct intel_context *ce) +{ + struct intel_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + unsigned int n; + + intel_timeline_exit(ce->timeline); + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_put(ve->siblings[n]); +} + +static const struct intel_context_ops virtual_context_ops = { + .pin = virtual_context_pin, + .unpin = intel_lr_context_unpin, + + .enter = virtual_context_enter, + .exit = virtual_context_exit, + + .destroy = virtual_context_destroy, +}; + +struct intel_context * +intel_virtual_engine_create(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count) +{ + struct intel_virtual_engine *ve; + unsigned int n; + int err; + + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(ctx, siblings[0]); + + ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + ve->base.i915 = ctx->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; + ve->base.id = -1; + ve->base.class = OTHER_CLASS; + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + + /* + * The decision on whether to submit a request using semaphores + * depends on the saturated state of the engine. We only compute + * this during HW submission of the request, and we need for this + * state to be globally applied to all requests being submitted + * to this engine. Virtual engines encompass more than one physical + * engine and so we cannot accurately tell in advance if one of those + * engines is already saturated and so cannot afford to use a semaphore + * and be pessimized in priority for doing so -- if we are the only + * context using semaphores after all other clients have stopped, we + * will be starved on the saturated system. Such a global switch for + * semaphores is less than ideal, but alas is the current compromise. + */ + ve->base.saturated = ALL_ENGINES; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + intel_engine_init_breadcrumbs(&ve->base); + + intel_engine_init_execlists(&ve->base); + + ve->base.cops = &virtual_context_ops; + + intel_execlists_virtual_submission_init(ve); + + ve->base.schedule = i915_schedule; + + INIT_LIST_HEAD(intel_virtual_engine_queue(ve)); + ve->base.execlists.queue_priority_hint = INT_MIN; + + intel_context_init(&ve->context, ctx, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + /* + * The virtual engine implementation is tightly coupled to + * the execlists backend -- we push out request directly + * into a tree inside each physical engine. We could support + * layering if we handle cloning of the requests and + * submitting a copy into each backend. + */ + if (!intel_engine_in_execlists_submission_mode(sibling)) { + err = -ENODEV; + goto err_put; + } + + GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); + RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); + + ve->siblings[ve->num_siblings++] = sibling; + ve->base.mask |= sibling->mask; + + /* + * All physical engines must be compatible for their emission + * functions (as we build the instructions during request + * construction and do not alter them before submission + * on the physical engine). We use the engine class as a guide + * here, although that could be refined. + */ + if (ve->base.class != OTHER_CLASS) { + if (ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } + continue; + } + + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + + ve->base.flags = sibling->flags; + } + + ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + + err = intel_lr_context_alloc(&ve->context, siblings[0]); + if (err) + goto err_put; + + __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags); + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +struct intel_context * +intel_virtual_engine_clone(struct i915_gem_context *ctx, + struct intel_engine_cs *src) +{ + struct intel_virtual_engine *se = to_virtual_engine(src); + struct intel_context *dst; + + dst = intel_virtual_engine_create(ctx, se->siblings, se->num_siblings); + if (IS_ERR(dst)) + return dst; + + if (se->num_bonds) { + struct intel_virtual_engine *de = + to_virtual_engine(dst->engine); + + de->bonds = kmemdup(se->bonds, + sizeof(*se->bonds) * se->num_bonds, + GFP_KERNEL); + if (!de->bonds) { + intel_context_put(dst); + return ERR_PTR(-ENOMEM); + } + + de->num_bonds = se->num_bonds; + } + + return dst; +} + +struct ve_bond * +intel_virtual_engine_find_bond(struct intel_virtual_engine *ve, + const struct intel_engine_cs *master) +{ + int i; + + for (i = 0; i < ve->num_bonds; i++) { + if (ve->bonds[i].master == master) + return &ve->bonds[i]; + } + + return NULL; +} + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling) +{ + struct intel_virtual_engine *ve = to_virtual_engine(engine); + struct ve_bond *bond; + int n; + + /* Sanity check the sibling is part of the virtual engine */ + for (n = 0; n < ve->num_siblings; n++) + if (sibling == ve->siblings[n]) + break; + if (n == ve->num_siblings) + return -EINVAL; + + bond = intel_virtual_engine_find_bond(ve, master); + if (bond) { + bond->sibling_mask |= sibling->mask; + return 0; + } + + bond = krealloc(ve->bonds, + sizeof(*bond) * (ve->num_bonds + 1), + GFP_KERNEL); + if (!bond) + return -ENOMEM; + + bond[ve->num_bonds].master = master; + bond[ve->num_bonds].sibling_mask = sibling->mask; + + ve->bonds = bond; + ve->num_bonds++; + + return 0; +} + +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling) +{ + struct intel_virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + diff --git a/drivers/gpu/drm/i915/gt/intel_virtual_engine.h b/drivers/gpu/drm/i915/gt/intel_virtual_engine.h new file mode 100644 index 000000000000..acda89ab3f99 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_virtual_engine.h @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_VIRTUAL_ENGINE__ +#define __INTEL_VIRTUAL_ENGINE__ + +#include "i915_gem.h" +#include "intel_virtual_engine_types.h" + +static inline struct intel_virtual_engine * +to_virtual_engine(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return container_of(engine, struct intel_virtual_engine, base); +} + +static inline struct list_head * +intel_virtual_engine_queue(struct intel_virtual_engine *ve) +{ + return &ve->base.execlists.default_priolist.requests[0]; +} + +struct intel_context * +intel_virtual_engine_create(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count); + +struct intel_context * +intel_virtual_engine_clone(struct i915_gem_context *ctx, + struct intel_engine_cs *src); + + +struct ve_bond * +intel_virtual_engine_find_bond(struct intel_virtual_engine *ve, + const struct intel_engine_cs *master); + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling); + +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling); + +#endif /* __INTEL_VIRTUAL_ENGINE__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b4537497c3be..570c7891c62f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2635,8 +2635,7 @@ static int nop_virtual_engine(struct intel_gt *gt, goto out; } - ve[n] = intel_execlists_create_virtual(ctx[n], - siblings, nsibling); + ve[n] = intel_virtual_engine_create(ctx[n], siblings, nsibling); if (IS_ERR(ve[n])) { kernel_context_close(ctx[n]); err = PTR_ERR(ve[n]); @@ -2816,7 +2815,7 @@ static int mask_virtual_engine(struct intel_gt *gt, if (!ctx) return -ENOMEM; - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_virtual_engine_create(ctx, siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -2942,7 +2941,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, goto out_close; } - ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + ve = intel_virtual_engine_create(ctx, siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -3172,9 +3171,9 @@ static int bond_virtual_engine(struct intel_gt *gt, for (n = 0; n < nsibling; n++) { struct intel_context *ve; - ve = intel_execlists_create_virtual(ctx, - siblings, - nsibling); + ve = intel_virtual_engine_create(ctx, + siblings, + nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); onstack_fence_fini(&fence);

[RFC,3/5] drm/i915: split out virtual engine code

Commit Message

Comments

Patch