diff mbox series

[v4,4/8] drm/i915: vgpu context submission pv optimization

Message ID 1553866364-111114-5-git-send-email-xiaolin.zhang@intel.com (mailing list archive)
State New, archived
Headers show
Series i915 vgpu PV to improve vgpu performance | expand

Commit Message

Xiaolin Zhang March 29, 2019, 1:32 p.m. UTC
It is performance optimization to override the actual submisison backend
in order to eliminate execlists csb process and reduce mmio trap numbers
for workload submission without contextswith interrupt by talking with
GVT via PV submisison notification mechanism between guest and GVT.

Use PV_SUBMISSION to control this level of pv optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.
v3:
1. to reduce more code duplication, code refactor and replaced 2 ops
"submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
in engine structure. pv version of write_des implemented.
2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
v4: implemented pv elsp submission tasklet as the backend workload
submisison by talking to GVT with PV notificaiton mechanism and renamed
VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION.

Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c    |   2 +
 drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
 drivers/gpu/drm/i915/i915_vgpu.c   | 158 ++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_vgpu.h   |  10 +++
 drivers/gpu/drm/i915/intel_lrc.c   |   3 +
 5 files changed, 173 insertions(+), 1 deletion(-)

Comments

Chris Wilson March 29, 2019, 7:15 a.m. UTC | #1
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> It is performance optimization to override the actual submisison backend
> in order to eliminate execlists csb process and reduce mmio trap numbers
> for workload submission without contextswith interrupt by talking with
> GVT via PV submisison notification mechanism between guest and GVT.

> Use PV_SUBMISSION to control this level of pv optimization.
> 
> v0: RFC
> v1: rebase
> v2: added pv ops for pv context submission. to maximize code resuse,
> introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
> (set_default_submission) in engine structure. pv version of
> submit_ports and preempt_context implemented.
> v3:
> 1. to reduce more code duplication, code refactor and replaced 2 ops
> "submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
> in engine structure. pv version of write_des implemented.
> 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
> v4: implemented pv elsp submission tasklet as the backend workload
> submisison by talking to GVT with PV notificaiton mechanism and renamed
> VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION.
> 
> Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c    |   2 +
>  drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
>  drivers/gpu/drm/i915/i915_vgpu.c   | 158 ++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_vgpu.h   |  10 +++
>  drivers/gpu/drm/i915/intel_lrc.c   |   3 +
>  5 files changed, 173 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2f78829..28e8ee0 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -37,6 +37,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include "i915_vgpu.h"
>  
>  /**
>   * DOC: interrupt handling
> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>         if (iir & GT_RENDER_USER_INTERRUPT) {
>                 intel_engine_breadcrumbs_irq(engine);
>                 tasklet |= USES_GUC_SUBMISSION(engine->i915);
> +               tasklet |= USES_PV_SUBMISSION(engine->i915);

You call this an optimisation!
-Chris
Chris Wilson March 29, 2019, 3:40 p.m. UTC | #2
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2f78829..28e8ee0 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -37,6 +37,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include "i915_vgpu.h"
>  
>  /**
>   * DOC: interrupt handling
> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>         if (iir & GT_RENDER_USER_INTERRUPT) {
>                 intel_engine_breadcrumbs_irq(engine);
>                 tasklet |= USES_GUC_SUBMISSION(engine->i915);
> +               tasklet |= USES_PV_SUBMISSION(engine->i915);

We should move this to an engine->flag.

>         }
>  
>         if (tasklet)
> +static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
> +{
> +       /*
> +        * We inherit a bunch of functions from execlists that we'd like
> +        * to keep using:
> +        *
> +        *    engine->submit_request = execlists_submit_request;
> +        *    engine->cancel_requests = execlists_cancel_requests;
> +        *    engine->schedule = execlists_schedule;
> +        *
> +        * But we need to override the actual submission backend in order
> +        * to talk to the GVT with PV notification message.
> +        */
> +       intel_execlists_set_default_submission(engine);
> +
> +       engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;

You need to pin the breadcrumbs irq, or it will not fire on every
request.

I'd push for this to live in intel_pv_submission.c or
intel_vgpu_submission.c

> @@ -401,6 +551,12 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
>                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
>                 ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
>         }
> +
> +       if (cap == PV_SUBMISSION) {
> +               engine = (struct intel_engine_cs *)data;
> +               engine->set_default_submission = vgpu_pv_set_default_submission;
> +               engine->set_default_submission(engine);
> +       }
>  }

> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index c1b9780..0a66714 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2352,6 +2352,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>                  */
>         }
>         engine->emit_bb_start = gen8_emit_bb_start;
> +
> +       if (intel_vgpu_active(engine->i915))
> +               intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, engine);

That pair is ugly. Should clean up the engine initialisation so that it
doesn't involve placing a chunk of code in a foreign class.
-Chris
Chris Wilson March 29, 2019, 7:14 p.m. UTC | #3
Quoting Xiaolin Zhang (2019-03-29 13:32:40)
> +       spin_lock(&engine->i915->vgpu.shared_page_lock);
> +       shared_page->ring_id = engine->id;
> +       for (n = 0; n < execlists_num_ports(execlists); n++)
> +               shared_page->descs[n] = descs[n];
> +
> +       __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
> +                       VGT_G2V_PV_SUBMISSION);

There's no serialisation with the consumer here. The
shared_page->descs[] will be clobbered by a second engine before they
are read by pv. There should be a wait-for-ack here, or separate
channels for each engine. In execlists, we avoid this by not dequeuing
until after we get a response/ack from HW.

Preemption? I guess preemption was turned off, or else you have to worry
about the same engine overwriting the desc[] (if no ack).
-Chris
Xiaolin Zhang April 11, 2019, 5:46 a.m. UTC | #4
On 03/29/2019 11:40 PM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> index 2f78829..28e8ee0 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -37,6 +37,7 @@
>>  #include "i915_drv.h"
>>  #include "i915_trace.h"
>>  #include "intel_drv.h"
>> +#include "i915_vgpu.h"
>>  
>>  /**
>>   * DOC: interrupt handling
>> @@ -1470,6 +1471,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>>         if (iir & GT_RENDER_USER_INTERRUPT) {
>>                 intel_engine_breadcrumbs_irq(engine);
>>                 tasklet |= USES_GUC_SUBMISSION(engine->i915);
>> +               tasklet |= USES_PV_SUBMISSION(engine->i915);
> We should move this to an engine->flag.
sure, I will remove this next version.
>
>>         }
>>  
>>         if (tasklet)
>> +static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
>> +{
>> +       /*
>> +        * We inherit a bunch of functions from execlists that we'd like
>> +        * to keep using:
>> +        *
>> +        *    engine->submit_request = execlists_submit_request;
>> +        *    engine->cancel_requests = execlists_cancel_requests;
>> +        *    engine->schedule = execlists_schedule;
>> +        *
>> +        * But we need to override the actual submission backend in order
>> +        * to talk to the GVT with PV notification message.
>> +        */
>> +       intel_execlists_set_default_submission(engine);
>> +
>> +       engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
> You need to pin the breadcrumbs irq, or it will not fire on every
> request.
indeed, I should do. thanks your great point.
> I'd push for this to live in intel_pv_submission.c or
> intel_vgpu_submission.c
this will create new file and make a change  in Makefile. if this kind
of change is OK,
I will create one to use intel_pv_submission.c name.
>> @@ -401,6 +551,12 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
>>                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
>>                 ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
>>         }
>> +
>> +       if (cap == PV_SUBMISSION) {
>> +               engine = (struct intel_engine_cs *)data;
>> +               engine->set_default_submission = vgpu_pv_set_default_submission;
>> +               engine->set_default_submission(engine);
>> +       }
>>  }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index c1b9780..0a66714 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2352,6 +2352,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>>                  */
>>         }
>>         engine->emit_bb_start = gen8_emit_bb_start;
>> +
>> +       if (intel_vgpu_active(engine->i915))
>> +               intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, engine);
> That pair is ugly. Should clean up the engine initialisation so that it
> doesn't involve placing a chunk of code in a foreign class.
> -Chris
>
Yep, will move it to other location.
Xiaolin Zhang April 11, 2019, 5:49 a.m. UTC | #5
On 03/30/2019 03:14 AM, Chris Wilson wrote:
> Quoting Xiaolin Zhang (2019-03-29 13:32:40)
>> +       spin_lock(&engine->i915->vgpu.shared_page_lock);
>> +       shared_page->ring_id = engine->id;
>> +       for (n = 0; n < execlists_num_ports(execlists); n++)
>> +               shared_page->descs[n] = descs[n];
>> +
>> +       __raw_i915_write32(uncore, vgtif_reg(g2v_notify),
>> +                       VGT_G2V_PV_SUBMISSION);
> There's no serialisation with the consumer here. The
> shared_page->descs[] will be clobbered by a second engine before they
> are read by pv. There should be a wait-for-ack here, or separate
> channels for each engine. In execlists, we avoid this by not dequeuing
> until after we get a response/ack from HW.
>
> Preemption? I guess preemption was turned off, or else you have to worry
> about the same engine overwriting the desc[] (if no ack).
> -Chris
>
Chris, thanks you to point it out my flaw here. will use the per-engine
channel for notification.  preemption is turned off and no support in
current version.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2f78829..28e8ee0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -37,6 +37,7 @@ 
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "i915_vgpu.h"
 
 /**
  * DOC: interrupt handling
@@ -1470,6 +1471,7 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		intel_engine_breadcrumbs_irq(engine);
 		tasklet |= USES_GUC_SUBMISSION(engine->i915);
+		tasklet |= USES_PV_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index 2408a9d..362d898 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -50,6 +50,7 @@  enum vgt_g2v_type {
 	VGT_G2V_PPGTT_L4_ALLOC,
 	VGT_G2V_PPGTT_L4_CLEAR,
 	VGT_G2V_PPGTT_L4_INSERT,
+	VGT_G2V_PV_SUBMISSION,
 	VGT_G2V_MAX,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 87a0ca5..53d05b3 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -23,6 +23,7 @@ 
 
 #include "intel_drv.h"
 #include "i915_vgpu.h"
+#include "intel_lrc_reg.h"
 
 /**
  * DOC: Intel GVT-g guest support
@@ -81,7 +82,7 @@  void i915_check_vgpu(struct drm_i915_private *dev_priv)
 	dev_priv->vgpu.active = true;
 
 	/* guest driver PV capability */
-	dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
+	dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE | PV_SUBMISSION;
 
 	if (!intel_vgpu_check_pv_caps(dev_priv)) {
 		DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
@@ -292,6 +293,154 @@  int intel_vgt_balloon(struct drm_i915_private *dev_priv)
  * i915 vgpu PV support for Linux
  */
 
+static u64 execlists_update_context(struct i915_request *rq)
+{
+	struct intel_context *ce = rq->hw_context;
+	u32 *reg_state = ce->lrc_reg_state;
+
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
+
+	return ce->lrc_desc;
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+	return rb_entry(rb, struct i915_priolist, node);
+}
+
+static void pv_submit(struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = &engine->i915->uncore;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
+	unsigned int n;
+	struct gvt_shared_page *shared_page = engine->i915->vgpu.shared_page;
+	u64 descs[2];
+
+	for (n = 0; n < execlists_num_ports(execlists); n++) {
+		struct i915_request *rq;
+		unsigned int count = 0;
+
+		descs[n] = 0;
+		rq = port_unpack(&port[n], &count);
+		if (rq && count == 0) {
+			port_set(&port[n], port_pack(rq, ++count));
+			descs[n] = execlists_update_context(rq);
+		}
+	}
+
+	spin_lock(&engine->i915->vgpu.shared_page_lock);
+	shared_page->ring_id = engine->id;
+	for (n = 0; n < execlists_num_ports(execlists); n++)
+		shared_page->descs[n] = descs[n];
+
+	__raw_i915_write32(uncore, vgtif_reg(g2v_notify),
+			VGT_G2V_PV_SUBMISSION);
+	spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
+static void pv_dequeue(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
+	struct i915_request *last = NULL;
+	bool submit = false;
+	struct rb_node *rb;
+
+	lockdep_assert_held(&engine->timeline.lock);
+
+	GEM_BUG_ON(port_isset(port));
+
+	while ((rb = rb_first_cached(&execlists->queue))) {
+		struct i915_priolist *p = to_priolist(rb);
+		struct i915_request *rq, *rn;
+		int i;
+
+		priolist_for_each_request_consume(rq, rn, p, i) {
+			if (last && rq->hw_context != last->hw_context)
+				goto done;
+
+			list_del_init(&rq->sched.link);
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+
+			last = rq;
+			submit = true;
+		}
+
+		rb_erase_cached(&p->node, &execlists->queue);
+		i915_priolist_free(p);
+	}
+done:
+	execlists->queue_priority_hint =
+			rb ? to_priolist(rb)->priority : INT_MIN;
+	if (submit) {
+		port_set(port, i915_request_get(last));
+		pv_submit(engine);
+	}
+	if (last)
+		execlists_user_begin(execlists, execlists->port);
+
+	/* We must always keep the beast fed if we have work piled up */
+	GEM_BUG_ON(port_isset(execlists->port) &&
+		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
+		   !port_isset(execlists->port));
+}
+
+static void vgpu_pv_submission_tasklet(unsigned long data)
+{
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
+	struct i915_request *rq;
+	unsigned long flags;
+	bool rq_finished = false;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
+	rq = port_request(port);
+	while (rq && i915_request_completed(rq)) {
+		trace_i915_request_out(rq);
+		rq_finished = true;
+		i915_request_put(rq);
+
+		port = execlists_port_complete(execlists, port);
+		if (port_isset(port)) {
+			rq_finished = false;
+			execlists_user_begin(execlists, port);
+			rq = port_request(port);
+		} else {
+			execlists_user_end(execlists);
+			rq = NULL;
+		}
+	}
+
+	if (rq_finished || !rq)
+		pv_dequeue(engine);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void vgpu_pv_set_default_submission(struct intel_engine_cs *engine)
+{
+	/*
+	 * We inherit a bunch of functions from execlists that we'd like
+	 * to keep using:
+	 *
+	 *    engine->submit_request = execlists_submit_request;
+	 *    engine->cancel_requests = execlists_cancel_requests;
+	 *    engine->schedule = execlists_schedule;
+	 *
+	 * But we need to override the actual submission backend in order
+	 * to talk to the GVT with PV notification message.
+	 */
+	intel_execlists_set_default_submission(engine);
+
+	engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
+}
+
 static void gen8_ppgtt_clear_4lvl_pv(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -391,6 +540,7 @@  void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
 		enum pv_caps cap, void *data)
 {
 	struct i915_hw_ppgtt *ppgtt;
+	struct intel_engine_cs *engine;
 
 	if (!intel_vgpu_enabled_pv_caps(dev_priv, cap))
 		return;
@@ -401,6 +551,12 @@  void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
 		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
 	}
+
+	if (cap == PV_SUBMISSION) {
+		engine = (struct intel_engine_cs *)data;
+		engine->set_default_submission = vgpu_pv_set_default_submission;
+		engine->set_default_submission(engine);
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index dfe2eb4..cae776d 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -31,6 +31,7 @@ 
  */
 enum pv_caps {
 	PV_PPGTT_UPDATE = 0x1,
+	PV_SUBMISSION = 0x2,
 };
 
 /*
@@ -80,6 +81,12 @@  intel_vgpu_enabled_pv_caps(struct drm_i915_private *dev_priv,
 			&& (dev_priv->vgpu.pv_caps & cap);
 }
 
+static inline bool
+intel_vgpu_is_using_pv_submission(struct drm_i915_private *dev_priv)
+{
+	return intel_vgpu_enabled_pv_caps(dev_priv, PV_SUBMISSION);
+}
+
 int intel_vgt_balloon(struct drm_i915_private *dev_priv);
 void intel_vgt_deballoon(struct drm_i915_private *dev_priv);
 
@@ -87,4 +94,7 @@  void intel_vgt_deballoon(struct drm_i915_private *dev_priv);
 bool intel_vgpu_check_pv_caps(struct drm_i915_private *dev_priv);
 void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
 		enum pv_caps cap, void *data);
+#define USES_PV_SUBMISSION(dev_priv) \
+	intel_vgpu_is_using_pv_submission(dev_priv)
+
 #endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c1b9780..0a66714 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2352,6 +2352,9 @@  logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 		 */
 	}
 	engine->emit_bb_start = gen8_emit_bb_start;
+
+	if (intel_vgpu_active(engine->i915))
+		intel_vgpu_config_pv_caps(engine->i915,	PV_SUBMISSION, engine);
 }
 
 static inline void