@@ -382,6 +382,9 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
int ring_id = workload->ring_id;
int ret;
+ if (VGPU_PVCAP(vgpu, PV_SUBMISSION))
+ return 0;
+
if (!workload->emulate_schedule_in)
return 0;
@@ -429,6 +432,9 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
goto out;
}
+ if (VGPU_PVCAP(vgpu, PV_SUBMISSION))
+ goto out;
+
ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
out:
intel_vgpu_unpin_mm(workload->shadow_mm);
@@ -1810,6 +1810,31 @@ static int mmio_read_from_hw(struct intel_vgpu *vgpu,
return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
}
+static int handle_pv_submission(struct intel_vgpu *vgpu, int ring_id)
+{
+ struct intel_vgpu_execlist *execlist;
+ u32 hw_id = vgpu->gvt->dev_priv->engine[ring_id]->hw_id;
+ u32 base = PV_ELSP_OFF + hw_id * sizeof(struct pv_submission);
+ u32 desc_off = offsetof(struct pv_submission, descs);
+ u32 submitted_off = offsetof(struct pv_submission, submitted);
+ bool submitted = false;
+ int ret;
+
+ execlist = &vgpu->submission.execlist[ring_id];
+ desc_off += base;
+ if (intel_gvt_read_shared_page(vgpu, desc_off,
+ &execlist->elsp_dwords.data, 16))
+ return -EINVAL;
+
+ ret = intel_vgpu_submit_execlist(vgpu, ring_id);
+ if (ret)
+ gvt_vgpu_err("fail submit workload on ring %d\n", ring_id);
+
+ submitted_off += base;
+ ret = intel_gvt_write_shared_page(vgpu, submitted_off, &submitted, 1);
+ return ret;
+}
+
static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
@@ -1821,8 +1846,11 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
if (WARN_ON(ring_id < 0 || ring_id >= I915_NUM_ENGINES))
return -EINVAL;
- execlist = &vgpu->submission.execlist[ring_id];
+ if (VGPU_PVCAP(vgpu, PV_SUBMISSION) &&
+ data == PV_ACTION_ELSP_SUBMISSION)
+ return handle_pv_submission(vgpu, ring_id);
+ execlist = &vgpu->submission.execlist[ring_id];
execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
if (execlist->elsp_dwords.index == 3) {
ret = intel_vgpu_submit_execlist(vgpu, ring_id);
@@ -51,6 +51,8 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
if (!intel_vtd_active())
vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) = PV_PPGTT_UPDATE;
+ vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) |= PV_SUBMISSION;
+
vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
vgpu_aperture_gmadr_base(vgpu);
vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
implemented context submission pv optimizaiton within GVTg. GVTg to read context submission data (elsp_data) from the shared_page directly without trap cost and eliminate execlist HW behavior emulation without injecting context switch interrupt to guest under PV submisison mechanism. v0: RFC. v1: rebase. v2: rebase. v3: report pv context submission cap and handle VGT_G2V_ELSP_SUBMIT g2v pv notification. v4: eliminate execlist HW emulation and don't inject context switch interrupt to guest under PV submisison mechanism. v5: rebase. v6: rebase. v7: rebase. v8: addressed v7 pv submission comments. Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com> --- drivers/gpu/drm/i915/gvt/execlist.c | 6 ++++++ drivers/gpu/drm/i915/gvt/handlers.c | 30 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/vgpu.c | 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-)