@@ -198,7 +198,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
selftests/igt_spinner.o
# virtual gpu code
-i915-y += i915_vgpu.o
+i915-y += i915_vgpu.o intel_pv_submission.o
# perf code
i915-y += i915_perf.o \
@@ -2380,11 +2380,15 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (!intel_vgpu_active(engine->i915))
- engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+ engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
if (engine->preempt_context &&
HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+
+ if (intel_vgpu_active(engine->i915)) {
+ engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
+ intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, engine);
+ }
}
static void
@@ -50,6 +50,7 @@ enum vgt_g2v_type {
VGT_G2V_PPGTT_L4_ALLOC,
VGT_G2V_PPGTT_L4_CLEAR,
VGT_G2V_PPGTT_L4_INSERT,
+ VGT_G2V_PV_SUBMISSION,
VGT_G2V_MAX,
};
@@ -81,7 +81,7 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
dev_priv->vgpu.active = true;
/* guest driver PV capability */
- dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
+ dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE | PV_SUBMISSION;
if (!intel_vgpu_check_pv_caps(dev_priv)) {
DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
@@ -361,6 +361,7 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
enum pv_caps cap, void *data)
{
struct i915_hw_ppgtt *ppgtt;
+ struct intel_engine_cs *engine;
if (!intel_vgpu_enabled_pv_caps(dev_priv, cap))
return;
@@ -371,6 +372,11 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
}
+
+ if (cap == PV_SUBMISSION) {
+ engine = (struct intel_engine_cs *)data;
+ vgpu_set_pv_submission(engine);
+ }
}
/*
@@ -31,6 +31,7 @@
*/
enum pv_caps {
PV_PPGTT_UPDATE = 0x1,
+ PV_SUBMISSION = 0x2,
};
/*
@@ -90,4 +91,6 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv);
bool intel_vgpu_check_pv_caps(struct drm_i915_private *dev_priv);
void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
enum pv_caps cap, void *data);
+void vgpu_set_pv_submission(struct intel_engine_cs *engine);
+
#endif /* _I915_VGPU_H_ */
new file mode 100644
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "intel_drv.h"
+#include "i915_vgpu.h"
+#include "gt/intel_lrc_reg.h"
+
+static u64 execlists_update_context(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->hw_context;
+ u32 *reg_state = ce->lrc_reg_state;
+
+ reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
+
+ return ce->lrc_desc;
+}
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static void pv_submit(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ unsigned int n;
+ struct gvt_shared_page *shared_page = engine->i915->vgpu.shared_page;
+ u64 descs[2];
+
+ for (n = 0; n < execlists_num_ports(execlists); n++) {
+ struct i915_request *rq;
+ unsigned int count = 0;
+
+ descs[n] = 0;
+ rq = port_unpack(&port[n], &count);
+ if (rq && count == 0) {
+ port_set(&port[n], port_pack(rq, ++count));
+ descs[n] = execlists_update_context(rq);
+ }
+ }
+
+ spin_lock(&engine->i915->vgpu.shared_page_lock[engine->id]);
+ for (n = 0; n < execlists_num_ports(execlists); n++)
+ shared_page->pv_elsp[engine->id].descs[n] = descs[n];
+
+ writel(VGT_G2V_PV_SUBMISSION, execlists->submit_reg);
+ spin_unlock(&engine->i915->vgpu.shared_page_lock[engine->id]);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->sched.attr.priority;
+}
+
+static inline int port_prio(const struct execlist_port *port)
+{
+ return rq_prio(port_request(port)) | __NO_PREEMPTION;
+}
+
+static void pv_dequeue(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ struct i915_request *last = NULL;
+ bool submit = false;
+ struct rb_node *rb;
+
+ lockdep_assert_held(&engine->timeline.lock);
+
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ struct i915_request *rq, *rn;
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ if (last && rq->hw_context != last->hw_context)
+ goto done;
+
+ list_del_init(&rq->sched.link);
+
+ __i915_request_submit(rq);
+ trace_i915_request_in(rq, port_index(port, execlists));
+
+ last = rq;
+ submit = true;
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+done:
+ execlists->queue_priority_hint =
+ rb ? to_priolist(rb)->priority : INT_MIN;
+ if (submit) {
+ port_set(port, i915_request_get(last));
+ pv_submit(engine);
+ }
+ if (last)
+ execlists_user_begin(execlists, execlists->port);
+
+ /* We must always keep the beast fed if we have work piled up */
+ GEM_BUG_ON(port_isset(execlists->port) &&
+ !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
+ GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
+ !port_isset(execlists->port));
+}
+
+static void vgpu_pv_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ struct i915_request *rq;
+ unsigned long flags;
+ bool rq_finished = false;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ rq = port_request(port);
+ while (rq && i915_request_completed(rq)) {
+ trace_i915_request_out(rq);
+ rq_finished = true;
+ i915_request_put(rq);
+
+ port = execlists_port_complete(execlists, port);
+ if (port_isset(port)) {
+ rq_finished = false;
+ execlists_user_begin(execlists, port);
+ rq = port_request(port);
+ } else {
+ execlists_user_end(execlists);
+ rq = NULL;
+ }
+ }
+
+ if (rq_finished || !rq)
+ pv_dequeue(engine);
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void vgpu_pv_submission_park(struct intel_engine_cs *engine)
+{
+ intel_engine_unpin_breadcrumbs_irq(engine);
+ engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+}
+
+static void vgpu_pv_submission_unpark(struct intel_engine_cs *engine)
+{
+ engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+ intel_engine_pin_breadcrumbs_irq(engine);
+}
+
+void vgpu_set_pv_submission(struct intel_engine_cs *engine)
+{
+ /*
+ * We inherit a bunch of functions from execlists that we'd like
+ * to keep using:
+ *
+ * engine->submit_request = execlists_submit_request;
+ * engine->cancel_requests = execlists_cancel_requests;
+ * engine->schedule = execlists_schedule;
+ *
+ * But we need to override the actual submission backend in order
+ * to talk to the GVT with PV notification message.
+ */
+
+ engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
+
+ engine->park = vgpu_pv_submission_park;
+ engine->unpark = vgpu_pv_submission_unpark;
+
+ engine->flags &= ~I915_ENGINE_HAS_PREEMPTION;
+}
It is performance optimization to override the actual submisison backend in order to eliminate execlists csb process and reduce mmio trap numbers for workload submission without context switch interrupt by talking with GVT via PV submisison notification mechanism between guest and GVT. Use PV_SUBMISSION to control this level of pv optimization. v0: RFC v1: rebase v2: added pv ops for pv context submission. to maximize code resuse, introduced 2 more ops (submit_ports & preempt_context) instead of 1 op (set_default_submission) in engine structure. pv version of submit_ports and preempt_context implemented. v3: 1. to reduce more code duplication, code refactor and replaced 2 ops "submit_ports & preempt_contex" from v2 by 1 ops "write_desc" in engine structure. pv version of write_des implemented. 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification. v4: implemented pv elsp submission tasklet as the backend workload submisison by talking to GVT with PV notificaiton mechanism and renamed VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION. v5: addressed v4 comments from Chris, intel_pv_submission.c added. Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com> --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 8 +- drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 8 +- drivers/gpu/drm/i915/i915_vgpu.h | 3 + drivers/gpu/drm/i915/intel_pv_submission.c | 177 +++++++++++++++++++++++++++++ 6 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_pv_submission.c