diff mbox series

[RFC,3/5] drm/i915: split out virtual engine code

Message ID 20191211211244.7831-4-daniele.ceraolospurio@intel.com (mailing list archive)
State New, archived
Headers show
Series Split up intel_lrc.c | expand

Commit Message

Daniele Ceraolo Spurio Dec. 11, 2019, 9:12 p.m. UTC
Having the virtual engine handling in its own file will make it easier
call it from or modify for the GuC implementation without leaking the
changes in the context management or execlists submission paths.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 403 ++----------------
 drivers/gpu/drm/i915/gt/intel_lrc.h           |  29 +-
 .../gpu/drm/i915/gt/intel_virtual_engine.c    | 359 ++++++++++++++++
 .../gpu/drm/i915/gt/intel_virtual_engine.h    |  48 +++
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  13 +-
 8 files changed, 457 insertions(+), 402 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_virtual_engine.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_virtual_engine.h

Comments

Chris Wilson Dec. 11, 2019, 9:22 p.m. UTC | #1
Quoting Daniele Ceraolo Spurio (2019-12-11 21:12:42)
> Having the virtual engine handling in its own file will make it easier
> call it from or modify for the GuC implementation without leaking the
> changes in the context management or execlists submission paths.

No. The virtual engine is tightly coupled into the execlists, it is not
the starting point for a general veng.
-Chris
Daniele Ceraolo Spurio Dec. 11, 2019, 9:34 p.m. UTC | #2
On 12/11/19 1:22 PM, Chris Wilson wrote:
> Quoting Daniele Ceraolo Spurio (2019-12-11 21:12:42)
>> Having the virtual engine handling in its own file will make it easier
>> call it from or modify for the GuC implementation without leaking the
>> changes in the context management or execlists submission paths.
> 
> No. The virtual engine is tightly coupled into the execlists, it is not
> the starting point for a general veng.
> -Chris
> 

What's the issue from your POV? We've been using it with little changes 
for GuC submission and IMO it flows relatively well, mainly just using a 
different tasklet and slightly different cops (need to call into GuC for 
pin/unpin).

Daniele
Matthew Brost Dec. 11, 2019, 11:09 p.m. UTC | #3
On Wed, Dec 11, 2019 at 01:34:20PM -0800, Daniele Ceraolo Spurio wrote:
>
>
>On 12/11/19 1:22 PM, Chris Wilson wrote:
>>Quoting Daniele Ceraolo Spurio (2019-12-11 21:12:42)
>>>Having the virtual engine handling in its own file will make it easier
>>>call it from or modify for the GuC implementation without leaking the
>>>changes in the context management or execlists submission paths.
>>
>>No. The virtual engine is tightly coupled into the execlists, it is not
>>the starting point for a general veng.
>>-Chris
>>
>
>What's the issue from your POV? We've been using it with little 
>changes for GuC submission and IMO it flows relatively well, mainly 
>just using a different tasklet and slightly different cops (need to 
>call into GuC for pin/unpin).
>
>Daniele

I agree with Daniele's approach here. The new GuC code can reuse
intel_execlists_create_virtual with a couple of GuC specific branches in the
function. The new GuC also reuses virtual_engine_enter / virtual_engine_exit in
the virtual GuC context operations. To me it makes more sense to have this
virtual engine code in its' own file than polluting an execlist specific file
with references to the GuC.

Matt
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e0fd10c0cfb8..79f5ef5acd4c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -98,6 +98,7 @@  gt-y += \
 	gt/intel_rps.o \
 	gt/intel_sseu.o \
 	gt/intel_timeline.o \
+	gt/intel_virtual_engine.o \
 	gt/intel_workarounds.o
 # autogenerated null render state
 gt-y += \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 46b4d1d643f8..6461370223b8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -74,6 +74,7 @@ 
 #include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 #include "gt/intel_ring.h"
+#include "gt/intel_virtual_engine.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
@@ -1536,7 +1537,7 @@  set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 		}
 	}
 
-	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	ce = intel_virtual_engine_create(set->ctx, siblings, n);
 	if (IS_ERR(ce)) {
 		err = PTR_ERR(ce);
 		goto out_siblings;
@@ -1999,7 +2000,7 @@  static int clone_engines(struct i915_gem_context *dst,
 		 */
 		if (intel_engine_is_virtual(engine))
 			clone->engines[n] =
-				intel_execlists_clone_virtual(dst, engine);
+				intel_virtual_engine_clone(dst, engine);
 		else
 			clone->engines[n] = intel_context_create(dst, engine);
 		if (IS_ERR_OR_NULL(clone->engines[n])) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 397186818305..33ab0e5bfa41 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -9,6 +9,7 @@ 
 #include "i915_drv.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
+#include "intel_virtual_engine.h"
 
 static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e6dea2d3a5c0..3afae9a44911 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -147,7 +147,7 @@ 
 #include "intel_mocs.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
-#include "intel_virtual_engine_types.h"
+#include "intel_virtual_engine.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -181,16 +181,6 @@ 
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
-static struct intel_virtual_engine *
-to_virtual_engine(struct intel_engine_cs *engine)
-{
-	GEM_BUG_ON(!intel_engine_is_virtual(engine));
-	return container_of(engine, struct intel_virtual_engine, base);
-}
-
-static int lr_context_alloc(struct intel_context *ce,
-			    struct intel_engine_cs *engine);
-
 static void lr_context_init_reg_state(u32 *reg_state,
 				      const struct intel_context *ce,
 				      const struct intel_engine_cs *engine,
@@ -805,6 +795,12 @@  static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 	}
 }
 
+u32 *intel_lr_context_set_register_offsets(u32 *regs,
+					   const struct intel_engine_cs *engine)
+{
+	return set_offsets(regs, reg_offsets(engine), engine);
+}
+
 static struct i915_request *
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
@@ -1403,12 +1399,6 @@  static bool can_merge_rq(const struct i915_request *prev,
 	return true;
 }
 
-static void virtual_update_register_offsets(u32 *regs,
-					    struct intel_engine_cs *engine)
-{
-	set_offsets(regs, reg_offsets(engine), engine);
-}
-
 static bool virtual_matches(const struct intel_virtual_engine *ve,
 			    const struct i915_request *rq,
 			    const struct intel_engine_cs *engine)
@@ -1802,8 +1792,8 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
 
 				if (!intel_engine_has_relative_mmio(engine))
-					virtual_update_register_offsets(regs,
-									engine);
+					intel_lr_context_set_register_offsets(regs,
+									      engine);
 
 				if (!list_empty(&ve->context.signals))
 					virtual_xfer_breadcrumbs(ve, engine);
@@ -2339,12 +2329,6 @@  static void execlists_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static void lr_context_fini(struct intel_context *ce)
-{
-	intel_ring_put(ce->ring);
-	i915_vma_put(ce->state);
-}
-
 static void execlists_context_destroy(struct kref *kref)
 {
 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
@@ -2353,7 +2337,7 @@  static void execlists_context_destroy(struct kref *kref)
 	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
-		lr_context_fini(ce);
+		intel_lr_context_fini(ce);
 
 	intel_context_fini(ce);
 	intel_context_free(ce);
@@ -2384,7 +2368,7 @@  check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 			     engine->name);
 }
 
-static void intel_lr_context_unpin(struct intel_context *ce)
+void intel_lr_context_unpin(struct intel_context *ce)
 {
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
 		      ce->engine);
@@ -2416,8 +2400,9 @@  lr_context_update_reg_state(const struct intel_context *ce,
 	}
 }
 
-static int
-lr_context_pin(struct intel_context *ce, struct intel_engine_cs *engine)
+int
+intel_lr_context_pin(struct intel_context *ce,
+		     struct intel_engine_cs *engine)
 {
 	void *vaddr;
 	int ret;
@@ -2451,12 +2436,12 @@  lr_context_pin(struct intel_context *ce, struct intel_engine_cs *engine)
 
 static int execlists_context_pin(struct intel_context *ce)
 {
-	return lr_context_pin(ce, ce->engine);
+	return intel_lr_context_pin(ce, ce->engine);
 }
 
 static int execlists_context_alloc(struct intel_context *ce)
 {
-	return lr_context_alloc(ce, ce->engine);
+	return intel_lr_context_alloc(ce, ce->engine);
 }
 
 static void execlists_context_reset(struct intel_context *ce)
@@ -4030,7 +4015,7 @@  static void lr_context_init_reg_state(u32 *regs,
 	 *
 	 * Must keep consistent with virtual_update_register_offsets().
 	 */
-	u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+	u32 *bbe = intel_lr_context_set_register_offsets(regs, engine);
 
 	if (close) { /* Close the batch; used mainly by live_lrc_layout() */
 		*bbe = MI_BATCH_BUFFER_END;
@@ -4098,8 +4083,8 @@  populate_lr_context(struct intel_context *ce,
 	return ret;
 }
 
-static int lr_context_alloc(struct intel_context *ce,
-			    struct intel_engine_cs *engine)
+int intel_lr_context_alloc(struct intel_context *ce,
+			   struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *ctx_obj;
 	struct intel_ring *ring;
@@ -4159,123 +4144,12 @@  static int lr_context_alloc(struct intel_context *ce,
 	return ret;
 }
 
-static struct list_head *virtual_queue(struct intel_virtual_engine *ve)
+void intel_lr_context_fini(struct intel_context *ce)
 {
-	return &ve->base.execlists.default_priolist.requests[0];
-}
-
-static void virtual_context_destroy(struct kref *kref)
-{
-	struct intel_virtual_engine *ve =
-		container_of(kref, typeof(*ve), context.ref);
-	unsigned int n;
-
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	GEM_BUG_ON(ve->request);
-	GEM_BUG_ON(ve->context.inflight);
-
-	for (n = 0; n < ve->num_siblings; n++) {
-		struct intel_engine_cs *sibling = ve->siblings[n];
-		struct rb_node *node = &ve->nodes[sibling->id].rb;
-		unsigned long flags;
-
-		if (RB_EMPTY_NODE(node))
-			continue;
-
-		spin_lock_irqsave(&sibling->active.lock, flags);
-
-		/* Detachment is lazily performed in the execlists tasklet */
-		if (!RB_EMPTY_NODE(node))
-			rb_erase_cached(node, &sibling->execlists.virtual);
-
-		spin_unlock_irqrestore(&sibling->active.lock, flags);
-	}
-	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
-
-	if (ve->context.state)
-		lr_context_fini(&ve->context);
-	intel_context_fini(&ve->context);
-
-	kfree(ve->bonds);
-	kfree(ve);
-}
-
-static void virtual_engine_initial_hint(struct intel_virtual_engine *ve)
-{
-	int swp;
-
-	/*
-	 * Pick a random sibling on starting to help spread the load around.
-	 *
-	 * New contexts are typically created with exactly the same order
-	 * of siblings, and often started in batches. Due to the way we iterate
-	 * the array of sibling when submitting requests, sibling[0] is
-	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
-	 * randomised across the system, we also help spread the load by the
-	 * first engine we inspect being different each time.
-	 *
-	 * NB This does not force us to execute on this engine, it will just
-	 * typically be the first we inspect for submission.
-	 */
-	swp = prandom_u32_max(ve->num_siblings);
-	if (!swp)
-		return;
-
-	swap(ve->siblings[swp], ve->siblings[0]);
-	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
-		virtual_update_register_offsets(ve->context.lrc_reg_state,
-						ve->siblings[0]);
-}
-
-static int virtual_context_pin(struct intel_context *ce)
-{
-	struct intel_virtual_engine *ve =
-		container_of(ce, typeof(*ve), context);
-	int err;
-
-	/* Note: we must use a real engine class for setting up reg state */
-	err = lr_context_pin(ce, ve->siblings[0]);
-	if (err)
-		return err;
-
-	virtual_engine_initial_hint(ve);
-	return 0;
-}
-
-static void virtual_context_enter(struct intel_context *ce)
-{
-	struct intel_virtual_engine *ve =
-		container_of(ce, typeof(*ve), context);
-	unsigned int n;
-
-	for (n = 0; n < ve->num_siblings; n++)
-		intel_engine_pm_get(ve->siblings[n]);
-
-	intel_timeline_enter(ce->timeline);
-}
-
-static void virtual_context_exit(struct intel_context *ce)
-{
-	struct intel_virtual_engine *ve =
-		container_of(ce, typeof(*ve), context);
-	unsigned int n;
-
-	intel_timeline_exit(ce->timeline);
-
-	for (n = 0; n < ve->num_siblings; n++)
-		intel_engine_pm_put(ve->siblings[n]);
+	intel_ring_put(ce->ring);
+	i915_vma_put(ce->state);
 }
 
-static const struct intel_context_ops virtual_context_ops = {
-	.pin = virtual_context_pin,
-	.unpin = intel_lr_context_unpin,
-
-	.enter = virtual_context_enter,
-	.exit = virtual_context_exit,
-
-	.destroy = virtual_context_destroy,
-};
-
 static intel_engine_mask_t
 virtual_submission_mask(struct intel_virtual_engine *ve)
 {
@@ -4414,8 +4288,8 @@  static void virtual_submit_request(struct i915_request *rq)
 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
 		ve->request = i915_request_get(rq);
 
-		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-		list_move_tail(&rq->sched.link, virtual_queue(ve));
+		GEM_BUG_ON(!list_empty(intel_virtual_engine_queue(ve)));
+		list_move_tail(&rq->sched.link, intel_virtual_engine_queue(ve));
 
 		tasklet_schedule(&ve->base.execlists.tasklet);
 	}
@@ -4423,20 +4297,6 @@  static void virtual_submit_request(struct i915_request *rq)
 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
 }
 
-static struct ve_bond *
-virtual_find_bond(struct intel_virtual_engine *ve,
-		  const struct intel_engine_cs *master)
-{
-	int i;
-
-	for (i = 0; i < ve->num_bonds; i++) {
-		if (ve->bonds[i].master == master)
-			return &ve->bonds[i];
-	}
-
-	return NULL;
-}
-
 static void
 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 {
@@ -4446,7 +4306,7 @@  virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 
 	allowed = ~to_request(signal)->engine->mask;
 
-	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	bond = intel_virtual_engine_find_bond(ve, to_request(signal)->engine);
 	if (bond)
 		allowed &= bond->sibling_mask;
 
@@ -4459,225 +4319,14 @@  virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 	to_request(signal)->execution_mask &= ~allowed;
 }
 
-struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
-			       struct intel_engine_cs **siblings,
-			       unsigned int count)
+void intel_execlists_virtual_submission_init(struct intel_virtual_engine *ve)
 {
-	struct intel_virtual_engine *ve;
-	unsigned int n;
-	int err;
-
-	if (count == 0)
-		return ERR_PTR(-EINVAL);
-
-	if (count == 1)
-		return intel_context_create(ctx, siblings[0]);
-
-	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
-	if (!ve)
-		return ERR_PTR(-ENOMEM);
-
-	ve->base.i915 = ctx->i915;
-	ve->base.gt = siblings[0]->gt;
-	ve->base.uncore = siblings[0]->uncore;
-	ve->base.id = -1;
-	ve->base.class = OTHER_CLASS;
-	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
-	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
-
-	/*
-	 * The decision on whether to submit a request using semaphores
-	 * depends on the saturated state of the engine. We only compute
-	 * this during HW submission of the request, and we need for this
-	 * state to be globally applied to all requests being submitted
-	 * to this engine. Virtual engines encompass more than one physical
-	 * engine and so we cannot accurately tell in advance if one of those
-	 * engines is already saturated and so cannot afford to use a semaphore
-	 * and be pessimized in priority for doing so -- if we are the only
-	 * context using semaphores after all other clients have stopped, we
-	 * will be starved on the saturated system. Such a global switch for
-	 * semaphores is less than ideal, but alas is the current compromise.
-	 */
-	ve->base.saturated = ALL_ENGINES;
-
-	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
-
-	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
-	intel_engine_init_breadcrumbs(&ve->base);
-
-	intel_engine_init_execlists(&ve->base);
-
-	ve->base.cops = &virtual_context_ops;
 	ve->base.request_alloc = execlists_request_alloc;
-
-	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
 	ve->base.bond_execute = virtual_bond_execute;
-
-	INIT_LIST_HEAD(virtual_queue(ve));
-	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
 		     virtual_submission_tasklet,
 		     (unsigned long)ve);
-
-	intel_context_init(&ve->context, ctx, &ve->base);
-
-	for (n = 0; n < count; n++) {
-		struct intel_engine_cs *sibling = siblings[n];
-
-		GEM_BUG_ON(!is_power_of_2(sibling->mask));
-		if (sibling->mask & ve->base.mask) {
-			DRM_DEBUG("duplicate %s entry in load balancer\n",
-				  sibling->name);
-			err = -EINVAL;
-			goto err_put;
-		}
-
-		/*
-		 * The virtual engine implementation is tightly coupled to
-		 * the execlists backend -- we push out request directly
-		 * into a tree inside each physical engine. We could support
-		 * layering if we handle cloning of the requests and
-		 * submitting a copy into each backend.
-		 */
-		if (sibling->execlists.tasklet.func !=
-		    execlists_submission_tasklet) {
-			err = -ENODEV;
-			goto err_put;
-		}
-
-		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
-		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
-
-		ve->siblings[ve->num_siblings++] = sibling;
-		ve->base.mask |= sibling->mask;
-
-		/*
-		 * All physical engines must be compatible for their emission
-		 * functions (as we build the instructions during request
-		 * construction and do not alter them before submission
-		 * on the physical engine). We use the engine class as a guide
-		 * here, although that could be refined.
-		 */
-		if (ve->base.class != OTHER_CLASS) {
-			if (ve->base.class != sibling->class) {
-				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
-					  sibling->class, ve->base.class);
-				err = -EINVAL;
-				goto err_put;
-			}
-			continue;
-		}
-
-		ve->base.class = sibling->class;
-		ve->base.uabi_class = sibling->uabi_class;
-		snprintf(ve->base.name, sizeof(ve->base.name),
-			 "v%dx%d", ve->base.class, count);
-		ve->base.context_size = sibling->context_size;
-
-		ve->base.emit_bb_start = sibling->emit_bb_start;
-		ve->base.emit_flush = sibling->emit_flush;
-		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
-		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
-		ve->base.emit_fini_breadcrumb_dw =
-			sibling->emit_fini_breadcrumb_dw;
-
-		ve->base.flags = sibling->flags;
-	}
-
-	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
-
-	err = lr_context_alloc(&ve->context, siblings[0]);
-	if (err)
-		goto err_put;
-
-	__set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
-	return &ve->context;
-
-err_put:
-	intel_context_put(&ve->context);
-	return ERR_PTR(err);
-}
-
-struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
-			      struct intel_engine_cs *src)
-{
-	struct intel_virtual_engine *se = to_virtual_engine(src);
-	struct intel_context *dst;
-
-	dst = intel_execlists_create_virtual(ctx,
-					     se->siblings,
-					     se->num_siblings);
-	if (IS_ERR(dst))
-		return dst;
-
-	if (se->num_bonds) {
-		struct intel_virtual_engine *de =
-			to_virtual_engine(dst->engine);
-
-		de->bonds = kmemdup(se->bonds,
-				    sizeof(*se->bonds) * se->num_bonds,
-				    GFP_KERNEL);
-		if (!de->bonds) {
-			intel_context_put(dst);
-			return ERR_PTR(-ENOMEM);
-		}
-
-		de->num_bonds = se->num_bonds;
-	}
-
-	return dst;
-}
-
-int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
-				     const struct intel_engine_cs *master,
-				     const struct intel_engine_cs *sibling)
-{
-	struct intel_virtual_engine *ve = to_virtual_engine(engine);
-	struct ve_bond *bond;
-	int n;
-
-	/* Sanity check the sibling is part of the virtual engine */
-	for (n = 0; n < ve->num_siblings; n++)
-		if (sibling == ve->siblings[n])
-			break;
-	if (n == ve->num_siblings)
-		return -EINVAL;
-
-	bond = virtual_find_bond(ve, master);
-	if (bond) {
-		bond->sibling_mask |= sibling->mask;
-		return 0;
-	}
-
-	bond = krealloc(ve->bonds,
-			sizeof(*bond) * (ve->num_bonds + 1),
-			GFP_KERNEL);
-	if (!bond)
-		return -ENOMEM;
-
-	bond[ve->num_bonds].master = master;
-	bond[ve->num_bonds].sibling_mask = sibling->mask;
-
-	ve->bonds = bond;
-	ve->num_bonds++;
-
-	return 0;
-}
-
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
-				 unsigned int sibling)
-{
-	struct intel_virtual_engine *ve = to_virtual_engine(engine);
-
-	if (sibling >= ve->num_siblings)
-		return NULL;
-
-	return ve->siblings[sibling];
 }
 
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 04511d8ebdc1..93f30b2deb7f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -33,6 +33,7 @@  struct i915_gem_context;
 struct i915_request;
 struct intel_context;
 struct intel_engine_cs;
+struct intel_virtual_engine;
 
 /* Execlists regs */
 #define RING_ELSP(base)				_MMIO((base) + 0x230)
@@ -98,11 +99,22 @@  int intel_execlists_submission_init(struct intel_engine_cs *engine);
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
+int intel_lr_context_alloc(struct intel_context *ce,
+			   struct intel_engine_cs *engine);
+void intel_lr_context_fini(struct intel_context *ce);
+
+u32 *intel_lr_context_set_register_offsets(u32 *regs,
+					   const struct intel_engine_cs *engine);
+
 void intel_lr_context_reset(struct intel_engine_cs *engine,
 			    struct intel_context *ce,
 			    u32 head,
 			    bool scrub);
 
+int intel_lr_context_pin(struct intel_context *ce,
+			 struct intel_engine_cs *engine);
+void intel_lr_context_unpin(struct intel_context *ce);
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -110,22 +122,7 @@  void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
-struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
-			       struct intel_engine_cs **siblings,
-			       unsigned int count);
-
-struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
-			      struct intel_engine_cs *src);
-
-int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
-				     const struct intel_engine_cs *master,
-				     const struct intel_engine_cs *sibling);
-
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
-				 unsigned int sibling);
+void intel_execlists_virtual_submission_init(struct intel_virtual_engine *ve);
 
 bool
 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_virtual_engine.c b/drivers/gpu/drm/i915/gt/intel_virtual_engine.c
new file mode 100644
index 000000000000..6ec3752132bc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_virtual_engine.c
@@ -0,0 +1,359 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+#include <linux/slab.h>
+
+#include "gem/i915_gem_context.h"
+
+#include "i915_gem.h"
+#include "intel_context.h"
+#include "intel_engine.h"
+#include "intel_engine_pm.h"
+#include "intel_lrc.h"
+#include "intel_timeline.h"
+#include "intel_virtual_engine.h"
+
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct intel_virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+	unsigned int n;
+
+	GEM_BUG_ON(!list_empty(intel_virtual_engine_queue(ve)));
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.inflight);
+
+	for (n = 0; n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+		unsigned long flags;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irqsave(&sibling->active.lock, flags);
+
+		/* Detachment is lazily performed in the execlists tasklet */
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irqrestore(&sibling->active.lock, flags);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		intel_lr_context_fini(&ve->context);
+	intel_context_fini(&ve->context);
+
+	kfree(ve->bonds);
+	kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct intel_virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->num_siblings);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
+		intel_lr_context_set_register_offsets(ve->context.lrc_reg_state,
+						      ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct intel_virtual_engine *ve =
+		container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = intel_lr_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+	return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+	struct intel_virtual_engine *ve =
+		container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_get(ve->siblings[n]);
+
+	intel_timeline_enter(ce->timeline);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+	struct intel_virtual_engine *ve =
+		container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	intel_timeline_exit(ce->timeline);
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = intel_lr_context_unpin,
+
+	.enter = virtual_context_enter,
+	.exit = virtual_context_exit,
+
+	.destroy = virtual_context_destroy,
+};
+
+struct intel_context *
+intel_virtual_engine_create(struct i915_gem_context *ctx,
+			    struct intel_engine_cs **siblings,
+			    unsigned int count)
+{
+	struct intel_virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (count == 0)
+		return ERR_PTR(-EINVAL);
+
+	if (count == 1)
+		return intel_context_create(ctx, siblings[0]);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	ve->base.i915 = ctx->i915;
+	ve->base.gt = siblings[0]->gt;
+	ve->base.uncore = siblings[0]->uncore;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+
+	/*
+	 * The decision on whether to submit a request using semaphores
+	 * depends on the saturated state of the engine. We only compute
+	 * this during HW submission of the request, and we need for this
+	 * state to be globally applied to all requests being submitted
+	 * to this engine. Virtual engines encompass more than one physical
+	 * engine and so we cannot accurately tell in advance if one of those
+	 * engines is already saturated and so cannot afford to use a semaphore
+	 * and be pessimized in priority for doing so -- if we are the only
+	 * context using semaphores after all other clients have stopped, we
+	 * will be starved on the saturated system. Such a global switch for
+	 * semaphores is less than ideal, but alas is the current compromise.
+	 */
+	ve->base.saturated = ALL_ENGINES;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
+	intel_engine_init_breadcrumbs(&ve->base);
+
+	intel_engine_init_execlists(&ve->base);
+
+	ve->base.cops = &virtual_context_ops;
+
+	intel_execlists_virtual_submission_init(ve);
+
+	ve->base.schedule = i915_schedule;
+
+	INIT_LIST_HEAD(intel_virtual_engine_queue(ve));
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask) {
+			DRM_DEBUG("duplicate %s entry in load balancer\n",
+				  sibling->name);
+			err = -EINVAL;
+			goto err_put;
+		}
+
+		/*
+		 * The virtual engine implementation is tightly coupled to
+		 * the execlists backend -- we push out request directly
+		 * into a tree inside each physical engine. We could support
+		 * layering if we handle cloning of the requests and
+		 * submitting a copy into each backend.
+		 */
+		if (!intel_engine_in_execlists_submission_mode(sibling)) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->num_siblings++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		/*
+		 * All physical engines must be compatible for their emission
+		 * functions (as we build the instructions during request
+		 * construction and do not alter them before submission
+		 * on the physical engine). We use the engine class as a guide
+		 * here, although that could be refined.
+		 */
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+					  sibling->class, ve->base.class);
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		ve->base.uabi_class = sibling->uabi_class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+
+		ve->base.flags = sibling->flags;
+	}
+
+	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+
+	err = intel_lr_context_alloc(&ve->context, siblings[0]);
+	if (err)
+		goto err_put;
+
+	__set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
+
+	return &ve->context;
+
+err_put:
+	intel_context_put(&ve->context);
+	return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_virtual_engine_clone(struct i915_gem_context *ctx,
+			   struct intel_engine_cs *src)
+{
+	struct intel_virtual_engine *se = to_virtual_engine(src);
+	struct intel_context *dst;
+
+	dst = intel_virtual_engine_create(ctx, se->siblings, se->num_siblings);
+	if (IS_ERR(dst))
+		return dst;
+
+	if (se->num_bonds) {
+		struct intel_virtual_engine *de =
+			to_virtual_engine(dst->engine);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->num_bonds,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_context_put(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->num_bonds = se->num_bonds;
+	}
+
+	return dst;
+}
+
+struct ve_bond *
+intel_virtual_engine_find_bond(struct intel_virtual_engine *ve,
+			       const struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->num_bonds; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling)
+{
+	struct intel_virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+	int n;
+
+	/* Sanity check the sibling is part of the virtual engine */
+	for (n = 0; n < ve->num_siblings; n++)
+		if (sibling == ve->siblings[n])
+			break;
+	if (n == ve->num_siblings)
+		return -EINVAL;
+
+	bond = intel_virtual_engine_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= sibling->mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->num_bonds + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->num_bonds].master = master;
+	bond[ve->num_bonds].sibling_mask = sibling->mask;
+
+	ve->bonds = bond;
+	ve->num_bonds++;
+
+	return 0;
+}
+
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+				 unsigned int sibling)
+{
+	struct intel_virtual_engine *ve = to_virtual_engine(engine);
+
+	if (sibling >= ve->num_siblings)
+		return NULL;
+
+	return ve->siblings[sibling];
+}
+
diff --git a/drivers/gpu/drm/i915/gt/intel_virtual_engine.h b/drivers/gpu/drm/i915/gt/intel_virtual_engine.h
new file mode 100644
index 000000000000..acda89ab3f99
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_virtual_engine.h
@@ -0,0 +1,48 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_VIRTUAL_ENGINE__
+#define __INTEL_VIRTUAL_ENGINE__
+
+#include "i915_gem.h"
+#include "intel_virtual_engine_types.h"
+
+static inline struct intel_virtual_engine *
+to_virtual_engine(struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!intel_engine_is_virtual(engine));
+	return container_of(engine, struct intel_virtual_engine, base);
+}
+
+static inline struct list_head *
+intel_virtual_engine_queue(struct intel_virtual_engine *ve)
+{
+	return &ve->base.execlists.default_priolist.requests[0];
+}
+
+struct intel_context *
+intel_virtual_engine_create(struct i915_gem_context *ctx,
+			    struct intel_engine_cs **siblings,
+				    unsigned int count);
+
+struct intel_context *
+intel_virtual_engine_clone(struct i915_gem_context *ctx,
+			   struct intel_engine_cs *src);
+
+
+struct ve_bond *
+intel_virtual_engine_find_bond(struct intel_virtual_engine *ve,
+			       const struct intel_engine_cs *master);
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling);
+
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+				 unsigned int sibling);
+
+#endif /* __INTEL_VIRTUAL_ENGINE__ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index b4537497c3be..570c7891c62f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2635,8 +2635,7 @@  static int nop_virtual_engine(struct intel_gt *gt,
 			goto out;
 		}
 
-		ve[n] = intel_execlists_create_virtual(ctx[n],
-						       siblings, nsibling);
+		ve[n] = intel_virtual_engine_create(ctx[n], siblings, nsibling);
 		if (IS_ERR(ve[n])) {
 			kernel_context_close(ctx[n]);
 			err = PTR_ERR(ve[n]);
@@ -2816,7 +2815,7 @@  static int mask_virtual_engine(struct intel_gt *gt,
 	if (!ctx)
 		return -ENOMEM;
 
-	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	ve = intel_virtual_engine_create(ctx, siblings, nsibling);
 	if (IS_ERR(ve)) {
 		err = PTR_ERR(ve);
 		goto out_close;
@@ -2942,7 +2941,7 @@  static int preserved_virtual_engine(struct intel_gt *gt,
 		goto out_close;
 	}
 
-	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	ve = intel_virtual_engine_create(ctx, siblings, nsibling);
 	if (IS_ERR(ve)) {
 		err = PTR_ERR(ve);
 		goto out_scratch;
@@ -3172,9 +3171,9 @@  static int bond_virtual_engine(struct intel_gt *gt,
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
-			ve = intel_execlists_create_virtual(ctx,
-							    siblings,
-							    nsibling);
+			ve = intel_virtual_engine_create(ctx,
+							 siblings,
+							 nsibling);
 			if (IS_ERR(ve)) {
 				err = PTR_ERR(ve);
 				onstack_fence_fini(&fence);