diff mbox series

[06/40] vhost: make svq work with gpa without iova translation

Message ID 1701970793-6865-7-git-send-email-si-wei.liu@oracle.com (mailing list archive)
State New, archived
Headers show
Series vdpa-net: improve migration downtime through descriptor ASID and persistent IOTLB | expand

Commit Message

Si-Wei Liu Dec. 7, 2023, 5:39 p.m. UTC
Make vhost_svq_vring_write_descs able to work with GPA directly
without going through iova tree for translation. This will be
needed in the next few patches where the SVQ has dedicated
address space to host its virtqueues. Instead of having to
translate qemu's VA to IOVA via the iova tree, with dedicated
or isolated address space for SVQ descriptors, the IOVA is
exactly same as the guest GPA space where translation would
not be needed any more.

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 hw/virtio/vhost-shadow-virtqueue.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

Comments

Eugenio Perez Martin Dec. 11, 2023, 11:17 a.m. UTC | #1
On Thu, Dec 7, 2023 at 7:50 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Make vhost_svq_vring_write_descs able to work with GPA directly
> without going through iova tree for translation. This will be
> needed in the next few patches where the SVQ has dedicated
> address space to host its virtqueues. Instead of having to
> translate qemu's VA to IOVA via the iova tree, with dedicated
> or isolated address space for SVQ descriptors, the IOVA is
> exactly same as the guest GPA space where translation would
> not be needed any more.
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  hw/virtio/vhost-shadow-virtqueue.c | 35 +++++++++++++++++++++++------------
>  1 file changed, 23 insertions(+), 12 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408..97ccd45 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -136,8 +136,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
>   * Return true if success, false otherwise and print error.
>   */
>  static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> -                                        const struct iovec *iovec, size_t num,
> -                                        bool more_descs, bool write)
> +                                        const struct iovec *iovec, hwaddr *addr,
> +                                        size_t num, bool more_descs, bool write)
>  {
>      uint16_t i = svq->free_head, last = svq->free_head;
>      unsigned n;
> @@ -149,8 +149,15 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>          return true;
>      }
>
> -    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> -    if (unlikely(!ok)) {
> +    if (svq->iova_tree) {
> +        ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> +        if (unlikely(!ok)) {
> +            return false;
> +        }
> +    } else if (!addr) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "No translation found for vaddr 0x%p\n",
> +                      iovec[0].iov_base);
>          return false;
>      }
>
> @@ -161,7 +168,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>          } else {
>              descs[i].flags = flags;
>          }
> -        descs[i].addr = cpu_to_le64(sg[n]);
> +        descs[i].addr = cpu_to_le64(svq->iova_tree ? sg[n] : addr[n]);
>          descs[i].len = cpu_to_le32(iovec[n].iov_len);
>
>          last = i;
> @@ -173,9 +180,10 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>  }
>
>  static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> -                                const struct iovec *out_sg, size_t out_num,
> -                                const struct iovec *in_sg, size_t in_num,
> -                                unsigned *head)
> +                                const struct iovec *out_sg, hwaddr *out_addr,
> +                                size_t out_num,
> +                                const struct iovec *in_sg, hwaddr *in_addr,
> +                                size_t in_num, unsigned *head)
>  {
>      unsigned avail_idx;
>      vring_avail_t *avail = svq->vring.avail;
> @@ -191,13 +199,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>          return false;
>      }
>
> -    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
> -                                     false);
> +    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_addr, out_num,
> +                                     in_num > 0, false);
>      if (unlikely(!ok)) {
>          return false;
>      }
>
> -    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
> +    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_addr, in_num,
> +                                     false, true);
>      if (unlikely(!ok)) {
>          return false;
>      }
> @@ -258,7 +267,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>          return -ENOSPC;
>      }
>
> -    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
> +    ok = vhost_svq_add_split(svq, out_sg, elem ? elem->out_addr : NULL,
> +                             out_num, in_sg, elem ? elem->in_addr : NULL,
> +                             in_num, &qemu_head);

This function is using in_sg and out_sg intentionally as CVQ buffers
do not use VirtQueueElement addressing. You can check calls at
net/vhost-vdpa.c for more info. The right place for this change is
actually vhost_svq_add_element, and I suggest checking for
svq->iova_tree as the rest of the patch.

Apart from that,

Reviewed-by: Eugenio Pérez <eperezma@redhat.com>

Thanks!

>      if (unlikely(!ok)) {
>          return -EINVAL;
>      }
> --
> 1.8.3.1
>
Jason Wang Jan. 11, 2024, 7:31 a.m. UTC | #2
On Fri, Dec 8, 2023 at 2:50 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Make vhost_svq_vring_write_descs able to work with GPA directly
> without going through iova tree for translation. This will be
> needed in the next few patches where the SVQ has dedicated
> address space to host its virtqueues. Instead of having to
> translate qemu's VA to IOVA via the iova tree, with dedicated
> or isolated address space for SVQ descriptors, the IOVA is
> exactly same as the guest GPA space where translation would
> not be needed any more.
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  hw/virtio/vhost-shadow-virtqueue.c | 35 +++++++++++++++++++++++------------
>  1 file changed, 23 insertions(+), 12 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408..97ccd45 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -136,8 +136,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
>   * Return true if success, false otherwise and print error.
>   */
>  static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> -                                        const struct iovec *iovec, size_t num,
> -                                        bool more_descs, bool write)
> +                                        const struct iovec *iovec, hwaddr *addr,
> +                                        size_t num, bool more_descs, bool write)
>  {
>      uint16_t i = svq->free_head, last = svq->free_head;
>      unsigned n;
> @@ -149,8 +149,15 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>          return true;
>      }
>
> -    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> -    if (unlikely(!ok)) {
> +    if (svq->iova_tree) {
> +        ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> +        if (unlikely(!ok)) {
> +            return false;
> +        }

So the idea is when shadow virtqueue can work directly for GPA, there
won't be an iova_tree here?

If yes, I think we need a comment around iova_tree or here to explain this.

> +    } else if (!addr) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "No translation found for vaddr 0x%p\n",
> +                      iovec[0].iov_base);
>          return false;
>      }
>
> @@ -161,7 +168,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>          } else {
>              descs[i].flags = flags;
>          }
> -        descs[i].addr = cpu_to_le64(sg[n]);
> +        descs[i].addr = cpu_to_le64(svq->iova_tree ? sg[n] : addr[n]);

Or maybe a helper and do the switch there with the comments.

Thanks

>          descs[i].len = cpu_to_le32(iovec[n].iov_len);
>
>          last = i;
> @@ -173,9 +180,10 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>  }
>
>  static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> -                                const struct iovec *out_sg, size_t out_num,
> -                                const struct iovec *in_sg, size_t in_num,
> -                                unsigned *head)
> +                                const struct iovec *out_sg, hwaddr *out_addr,
> +                                size_t out_num,
> +                                const struct iovec *in_sg, hwaddr *in_addr,
> +                                size_t in_num, unsigned *head)
>  {
>      unsigned avail_idx;
>      vring_avail_t *avail = svq->vring.avail;
> @@ -191,13 +199,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
>          return false;
>      }
>
> -    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
> -                                     false);
> +    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_addr, out_num,
> +                                     in_num > 0, false);
>      if (unlikely(!ok)) {
>          return false;
>      }
>
> -    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
> +    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_addr, in_num,
> +                                     false, true);
>      if (unlikely(!ok)) {
>          return false;
>      }
> @@ -258,7 +267,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>          return -ENOSPC;
>      }
>
> -    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
> +    ok = vhost_svq_add_split(svq, out_sg, elem ? elem->out_addr : NULL,
> +                             out_num, in_sg, elem ? elem->in_addr : NULL,
> +                             in_num, &qemu_head);
>      if (unlikely(!ok)) {
>          return -EINVAL;
>      }
> --
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index fc5f408..97ccd45 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -136,8 +136,8 @@  static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
  * Return true if success, false otherwise and print error.
  */
 static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-                                        const struct iovec *iovec, size_t num,
-                                        bool more_descs, bool write)
+                                        const struct iovec *iovec, hwaddr *addr,
+                                        size_t num, bool more_descs, bool write)
 {
     uint16_t i = svq->free_head, last = svq->free_head;
     unsigned n;
@@ -149,8 +149,15 @@  static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         return true;
     }
 
-    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
-    if (unlikely(!ok)) {
+    if (svq->iova_tree) {
+        ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+        if (unlikely(!ok)) {
+            return false;
+        }
+    } else if (!addr) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "No translation found for vaddr 0x%p\n",
+                      iovec[0].iov_base);
         return false;
     }
 
@@ -161,7 +168,7 @@  static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         } else {
             descs[i].flags = flags;
         }
-        descs[i].addr = cpu_to_le64(sg[n]);
+        descs[i].addr = cpu_to_le64(svq->iova_tree ? sg[n] : addr[n]);
         descs[i].len = cpu_to_le32(iovec[n].iov_len);
 
         last = i;
@@ -173,9 +180,10 @@  static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-                                const struct iovec *out_sg, size_t out_num,
-                                const struct iovec *in_sg, size_t in_num,
-                                unsigned *head)
+                                const struct iovec *out_sg, hwaddr *out_addr,
+                                size_t out_num,
+                                const struct iovec *in_sg, hwaddr *in_addr,
+                                size_t in_num, unsigned *head)
 {
     unsigned avail_idx;
     vring_avail_t *avail = svq->vring.avail;
@@ -191,13 +199,14 @@  static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
         return false;
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
-                                     false);
+    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_addr, out_num,
+                                     in_num > 0, false);
     if (unlikely(!ok)) {
         return false;
     }
 
-    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_addr, in_num,
+                                     false, true);
     if (unlikely(!ok)) {
         return false;
     }
@@ -258,7 +267,9 @@  int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
         return -ENOSPC;
     }
 
-    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+    ok = vhost_svq_add_split(svq, out_sg, elem ? elem->out_addr : NULL,
+                             out_num, in_sg, elem ? elem->in_addr : NULL,
+                             in_num, &qemu_head);
     if (unlikely(!ok)) {
         return -EINVAL;
     }