diff mbox series

[v10,3/9] drm/i915: vgpu pv command buffer support

Message ID 1568699301-2799-4-git-send-email-xiaolin.zhang@intel.com (mailing list archive)
State New, archived
Headers show
Series i915 vgpu PV to improve vgpu performance | expand

Commit Message

Xiaolin Zhang Sept. 17, 2019, 5:48 a.m. UTC
based on the shared memory setup between guest and GVT, the simple
pv command buffer ring was introduced by this patch used to perform
guest-2-gvt single direction communication.

v1: initial support, added to address i915 PV v6 patch set comment.

Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
---
 drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
 drivers/gpu/drm/i915/i915_vgpu.c   | 193 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_vgpu.h   |  66 +++++++++++++
 3 files changed, 260 insertions(+)

Comments

Chris Wilson Sept. 17, 2019, 9:45 a.m. UTC | #1
Quoting Xiaolin Zhang (2019-09-17 06:48:14)
> based on the shared memory setup between guest and GVT, the simple
> pv command buffer ring was introduced by this patch used to perform
> guest-2-gvt single direction communication.
> 
> v1: initial support, added to address i915 PV v6 patch set comment.
> 
> Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pvinfo.h |   1 +
>  drivers/gpu/drm/i915/i915_vgpu.c   | 193 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_vgpu.h   |  66 +++++++++++++
>  3 files changed, 260 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
> index 3c63603..db9eebb 100644
> --- a/drivers/gpu/drm/i915/i915_pvinfo.h
> +++ b/drivers/gpu/drm/i915/i915_pvinfo.h
> @@ -49,6 +49,7 @@ enum vgt_g2v_type {
>         VGT_G2V_EXECLIST_CONTEXT_CREATE,
>         VGT_G2V_EXECLIST_CONTEXT_DESTROY,
>         VGT_G2V_SHARED_PAGE_SETUP,
> +       VGT_G2V_PV_SEND_TRIGGER,
>         VGT_G2V_MAX,
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
> index 68fecfd..6e29a52 100644
> --- a/drivers/gpu/drm/i915/i915_vgpu.c
> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> @@ -322,6 +322,187 @@ int intel_vgt_balloon(struct i915_ggtt *ggtt)
>   * i915 vgpu PV support for Linux
>   */
>  
> +/**
> + * wait_for_desc_update - Wait for the command buffer descriptor update.
> + * @desc:      buffer descriptor
> + * @fence:     response fence
> + * @status:    placeholder for status
> + *
> + * GVT will update command buffer descriptor with new fence and status
> + * after processing the command identified by the fence. Wait for
> + * specified fence and then read from the descriptor status of the
> + * command.
> + *
> + * Return:
> + * *   0 response received (status is valid)
> + * *   -ETIMEDOUT no response within hardcoded timeout
> + * *   -EPROTO no response, CT buffer is in error
> + */
> +static int wait_for_desc_update(struct vgpu_pv_ct_buffer_desc *desc,
> +               u32 fence, u32 *status)
> +{
> +       int err;
> +
> +#define done (READ_ONCE(desc->fence) == fence)
> +       err = wait_for_us(done, 5);
> +       if (err)
> +               err = wait_for(done, 10);
> +#undef done
> +
> +       if (unlikely(err)) {
> +               DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
> +                               fence, desc->fence);
> +       }
> +
> +       *status = desc->status;
> +       return err;
> +}
> +
> +/**
> + * DOC: CTB Guest to GVT request
> + *
> + * Format of the CTB Guest to GVT request message is as follows::
> + *
> + *      +------------+---------+---------+---------+---------+
> + *      |   msg[0]   |   [1]   |   [2]   |   ...   |  [n-1]  |
> + *      +------------+---------+---------+---------+---------+
> + *      |   MESSAGE  |       MESSAGE PAYLOAD                 |
> + *      +   HEADER   +---------+---------+---------+---------+
> + *      |            |    0    |    1    |   ...   |    n    |
> + *      +============+=========+=========+=========+=========+
> + *      |  len >= 1  |  FENCE  |     request specific data   |
> + *      +------+-----+---------+---------+---------+---------+
> + *
> + *                   ^-----------------len-------------------^
> + */
> +static int pv_command_buffer_write(struct i915_virtual_gpu_pv *pv,
> +               const u32 *action, u32 len /* in dwords */, u32 fence)
> +{
> +       struct vgpu_pv_ct_buffer_desc *desc = pv->ctb.desc;
> +       u32 head = desc->head / 4;      /* in dwords */
> +       u32 tail = desc->tail / 4;      /* in dwords */
> +       u32 size = desc->size / 4;      /* in dwords */
> +       u32 used;                       /* in dwords */
> +       u32 header;
> +       u32 *cmds = pv->ctb.cmds;
> +       unsigned int i;
> +
> +       GEM_BUG_ON(desc->size % 4);
> +       GEM_BUG_ON(desc->head % 4);
> +       GEM_BUG_ON(desc->tail % 4);
> +       GEM_BUG_ON(tail >= size);
> +
> +       /*
> +        * tail == head condition indicates empty.
> +        */
> +       if (tail < head)
> +               used = (size - head) + tail;
> +       else
> +               used = tail - head;
> +
> +       /* make sure there is a space including extra dw for the fence */
> +       if (unlikely(used + len + 1 >= size))
> +               return -ENOSPC;
> +
> +       /*
> +        * Write the message. The format is the following:
> +        * DW0: header (including action code)
> +        * DW1: fence
> +        * DW2+: action data
> +        */
> +       header = (len << PV_CT_MSG_LEN_SHIFT) |
> +                (PV_CT_MSG_WRITE_FENCE_TO_DESC) |
> +                (action[0] << PV_CT_MSG_ACTION_SHIFT);
> +
> +       cmds[tail] = header;
> +       tail = (tail + 1) % size;
> +
> +       cmds[tail] = fence;
> +       tail = (tail + 1) % size;
> +
> +       for (i = 1; i < len; i++) {
> +               cmds[tail] = action[i];
> +               tail = (tail + 1) % size;
> +       }
> +
> +       /* now update desc tail (back in bytes) */

wmb();

Answers on a postcard as to why this is required and where the
complementary rmb/mb should be.

> +       desc->tail = tail * 4;
> +       GEM_BUG_ON(desc->tail > desc->size);
> +
> +       return 0;
> +}
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index 3c63603..db9eebb 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,6 +49,7 @@  enum vgt_g2v_type {
 	VGT_G2V_EXECLIST_CONTEXT_CREATE,
 	VGT_G2V_EXECLIST_CONTEXT_DESTROY,
 	VGT_G2V_SHARED_PAGE_SETUP,
+	VGT_G2V_PV_SEND_TRIGGER,
 	VGT_G2V_MAX,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 68fecfd..6e29a52 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -322,6 +322,187 @@  int intel_vgt_balloon(struct i915_ggtt *ggtt)
  * i915 vgpu PV support for Linux
  */
 
+/**
+ * wait_for_desc_update - Wait for the command buffer descriptor update.
+ * @desc:	buffer descriptor
+ * @fence:	response fence
+ * @status:	placeholder for status
+ *
+ * GVT will update command buffer descriptor with new fence and status
+ * after processing the command identified by the fence. Wait for
+ * specified fence and then read from the descriptor status of the
+ * command.
+ *
+ * Return:
+ * *	0 response received (status is valid)
+ * *	-ETIMEDOUT no response within hardcoded timeout
+ * *	-EPROTO no response, CT buffer is in error
+ */
+static int wait_for_desc_update(struct vgpu_pv_ct_buffer_desc *desc,
+		u32 fence, u32 *status)
+{
+	int err;
+
+#define done (READ_ONCE(desc->fence) == fence)
+	err = wait_for_us(done, 5);
+	if (err)
+		err = wait_for(done, 10);
+#undef done
+
+	if (unlikely(err)) {
+		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+				fence, desc->fence);
+	}
+
+	*status = desc->status;
+	return err;
+}
+
+/**
+ * DOC: CTB Guest to GVT request
+ *
+ * Format of the CTB Guest to GVT request message is as follows::
+ *
+ *      +------------+---------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   [2]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD                 |
+ *      +   HEADER   +---------+---------+---------+---------+
+ *      |            |    0    |    1    |   ...   |    n    |
+ *      +============+=========+=========+=========+=========+
+ *      |  len >= 1  |  FENCE  |     request specific data   |
+ *      +------+-----+---------+---------+---------+---------+
+ *
+ *                   ^-----------------len-------------------^
+ */
+static int pv_command_buffer_write(struct i915_virtual_gpu_pv *pv,
+		const u32 *action, u32 len /* in dwords */, u32 fence)
+{
+	struct vgpu_pv_ct_buffer_desc *desc = pv->ctb.desc;
+	u32 head = desc->head / 4;	/* in dwords */
+	u32 tail = desc->tail / 4;	/* in dwords */
+	u32 size = desc->size / 4;	/* in dwords */
+	u32 used;			/* in dwords */
+	u32 header;
+	u32 *cmds = pv->ctb.cmds;
+	unsigned int i;
+
+	GEM_BUG_ON(desc->size % 4);
+	GEM_BUG_ON(desc->head % 4);
+	GEM_BUG_ON(desc->tail % 4);
+	GEM_BUG_ON(tail >= size);
+
+	/*
+	 * tail == head condition indicates empty.
+	 */
+	if (tail < head)
+		used = (size - head) + tail;
+	else
+		used = tail - head;
+
+	/* make sure there is a space including extra dw for the fence */
+	if (unlikely(used + len + 1 >= size))
+		return -ENOSPC;
+
+	/*
+	 * Write the message. The format is the following:
+	 * DW0: header (including action code)
+	 * DW1: fence
+	 * DW2+: action data
+	 */
+	header = (len << PV_CT_MSG_LEN_SHIFT) |
+		 (PV_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (action[0] << PV_CT_MSG_ACTION_SHIFT);
+
+	cmds[tail] = header;
+	tail = (tail + 1) % size;
+
+	cmds[tail] = fence;
+	tail = (tail + 1) % size;
+
+	for (i = 1; i < len; i++) {
+		cmds[tail] = action[i];
+		tail = (tail + 1) % size;
+	}
+
+	/* now update desc tail (back in bytes) */
+	desc->tail = tail * 4;
+	GEM_BUG_ON(desc->tail > desc->size);
+
+	return 0;
+}
+
+static u32 pv_get_next_fence(struct i915_virtual_gpu_pv *pv)
+{
+	/* For now it's trivial */
+	return ++pv->next_fence;
+}
+
+static int pv_send(struct drm_i915_private *dev_priv,
+		const u32 *action, u32 len, u32 *status)
+{
+	struct i915_virtual_gpu *vgpu = &dev_priv->vgpu;
+	struct i915_virtual_gpu_pv *pv = vgpu->pv;
+
+	struct vgpu_pv_ct_buffer_desc *desc = pv->ctb.desc;
+
+	u32 fence;
+	int err;
+
+	GEM_BUG_ON(!pv->enabled);
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len & ~PV_CT_MSG_LEN_MASK);
+
+	fence = pv_get_next_fence(pv);
+	err = pv_command_buffer_write(pv, action, len, fence);
+	if (unlikely(err))
+		goto unlink;
+
+	intel_vgpu_pv_notify(dev_priv);
+
+	err = wait_for_desc_update(desc, fence, status);
+	if (unlikely(err))
+		goto unlink;
+
+	if ((*status)) {
+		err = -EIO;
+		goto unlink;
+	}
+
+	err = (*status);
+unlink:
+	return err;
+}
+
+static int intel_vgpu_pv_send_command_buffer(
+		struct drm_i915_private *dev_priv,
+		u32 *action, u32 len)
+{
+	struct i915_virtual_gpu *vgpu = &dev_priv->vgpu;
+
+	u32 status = ~0; /* undefined */
+	int ret;
+
+	mutex_lock(&vgpu->pv->send_mutex);
+
+	ret = pv_send(dev_priv, action, len, &status);
+	if (unlikely(ret < 0)) {
+		DRM_ERROR("PV: send action %#X failed; err=%d status=%#X\n",
+			  action[0], ret, status);
+	} else if (unlikely(ret)) {
+		DRM_ERROR("PV: send action %#x returned %d (%#x)\n",
+				action[0], ret, ret);
+	}
+
+	mutex_unlock(&vgpu->pv->send_mutex);
+	return ret;
+}
+
+static void intel_vgpu_pv_notify_mmio(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(vgtif_reg(g2v_notify), VGT_G2V_PV_SEND_TRIGGER);
+}
+
 /*
  * shared_page setup for VGPU PV features
  */
@@ -394,6 +575,18 @@  static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv,
 	dev_priv->vgpu.pv = pv;
 	pv->shared_page = base;
 	pv->enabled = true;
+
+	/* setup PV command buffer ptr */
+	pv->ctb.cmds = (void *)base + PV_CMD_OFF;
+	pv->ctb.desc = (void *)base + PV_DESC_OFF;
+	pv->ctb.desc->size = PAGE_SIZE/2;
+	pv->ctb.desc->addr = PV_CMD_OFF;
+
+	/* setup PV command buffer callback */
+	pv->send = intel_vgpu_pv_send_command_buffer;
+	pv->notify = intel_vgpu_pv_notify_mmio;
+	mutex_init(&pv->send_mutex);
+
 	return ret;
 err:
 	__free_page(virt_to_page(base));
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index 401af24..c494beb 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -29,6 +29,8 @@ 
 
 #define PV_MAJOR		1
 #define PV_MINOR		0
+#define PV_DESC_OFF		(PAGE_SIZE/4)
+#define PV_CMD_OFF		(PAGE_SIZE/2)
 
 /*
  * A shared page(4KB) between gvt and VM, could be allocated by guest driver
@@ -39,9 +41,60 @@  struct gvt_shared_page {
 	u16 ver_minor;
 };
 
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]		write fence to desc
+ * bit[9..11]	reserved
+ * bit[31..16]	action code
+ */
+#define PV_CT_MSG_LEN_SHIFT				0
+#define PV_CT_MSG_LEN_MASK				0x1F
+#define PV_CT_MSG_WRITE_FENCE_TO_DESC	(1 << 8)
+#define PV_CT_MSG_ACTION_SHIFT			16
+#define PV_CT_MSG_ACTION_MASK			0xFFFF
+
+/* PV command transport buffer descriptor */
+struct vgpu_pv_ct_buffer_desc {
+	u32 addr;		/* gfx address */
+	u32 size;		/* size in bytes */
+	u32 head;		/* offset updated by GVT */
+	u32 tail;		/* offset updated by owner */
+
+	u32 fence;		/* fence updated by GVT */
+	u32 status;		/* status updated by GVT */
+} __packed;
+
+/** PV single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct vgpu_pv_ct_buffer {
+	struct vgpu_pv_ct_buffer_desc *desc;
+	u32 *cmds;
+};
+
 struct i915_virtual_gpu_pv {
 	struct gvt_shared_page *shared_page;
 	bool enabled;
+
+	/* PV command buffer support */
+	struct vgpu_pv_ct_buffer ctb;
+	u32 next_fence;
+
+	/* To serialize the vgpu PV send actions */
+	struct mutex send_mutex;
+
+	/* VGPU's PV specific send function */
+	int (*send)(struct drm_i915_private *dev_priv, u32 *data, u32 len);
+	void (*notify)(struct drm_i915_private *dev_priv);
 };
 
 void i915_detect_vgpu(struct drm_i915_private *dev_priv);
@@ -67,6 +120,19 @@  struct i915_virtual_gpu_pv {
 	return dev_priv->vgpu.caps & VGT_CAPS_PV;
 }
 
+static inline void
+intel_vgpu_pv_notify(struct drm_i915_private *dev_priv)
+{
+	dev_priv->vgpu.pv->notify(dev_priv);
+}
+
+static inline int
+intel_vgpu_pv_send(struct drm_i915_private *dev_priv,
+		u32 *action, u32 len)
+{
+	return dev_priv->vgpu.pv->send(dev_priv, action, len);
+}
+
 int intel_vgt_balloon(struct i915_ggtt *ggtt);
 void intel_vgt_deballoon(struct i915_ggtt *ggtt);