diff mbox series

[v14,4/4] vhost-vdpa: Add support for vIOMMU.

Message ID 20230320161959.650841-5-lulu@redhat.com (mailing list archive)
State New, archived
Headers show
Series vhost-vdpa: add support for vIOMMU | expand

Commit Message

Cindy Lu March 20, 2023, 4:19 p.m. UTC
1. The vIOMMU support will make vDPA can work in IOMMU mode. This
will fix security issues while using the no-IOMMU mode.
To support this feature we need to add new functions for IOMMU MR adds and
deletes.

Also since the SVQ does not support vIOMMU yet, add the check for IOMMU
in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
the function will return fail.

2. Skip the iova_max check vhost_vdpa_listener_skipped_section(). While
MR is IOMMU, move this check to  vhost_vdpa_iommu_map_notify()

Verified in vp_vdpa and vdpa_sim_net driver

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/virtio/vhost-vdpa.c         | 149 +++++++++++++++++++++++++++++++--
 include/hw/virtio/vhost-vdpa.h |  11 +++
 2 files changed, 152 insertions(+), 8 deletions(-)

Comments

Jason Wang March 21, 2023, 3:21 a.m. UTC | #1
On Tue, Mar 21, 2023 at 12:20 AM Cindy Lu <lulu@redhat.com> wrote:
>
> 1. The vIOMMU support will make vDPA can work in IOMMU mode. This
> will fix security issues while using the no-IOMMU mode.
> To support this feature we need to add new functions for IOMMU MR adds and
> deletes.
>
> Also since the SVQ does not support vIOMMU yet, add the check for IOMMU
> in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
> the function will return fail.
>
> 2. Skip the iova_max check vhost_vdpa_listener_skipped_section(). While
> MR is IOMMU, move this check to  vhost_vdpa_iommu_map_notify()
>
> Verified in vp_vdpa and vdpa_sim_net driver
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/virtio/vhost-vdpa.c         | 149 +++++++++++++++++++++++++++++++--
>  include/hw/virtio/vhost-vdpa.h |  11 +++
>  2 files changed, 152 insertions(+), 8 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 0c8c37e786..b36922b365 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -26,6 +26,7 @@
>  #include "cpu.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "hw/virtio/virtio-access.h"
>
>  /*
>   * Return one past the end of the end of section. Be careful with uint64_t
> @@ -60,15 +61,22 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
>                       iova_min, section->offset_within_address_space);
>          return true;
>      }
> +    /*
> +     * While using vIOMMU, sometimes the section will be larger than iova_max,
> +     * but the memory that actually maps is smaller, so move the check to
> +     * function vhost_vdpa_iommu_map_notify(). That function will use the actual
> +     * size that maps to the kernel
> +     */
>
> -    llend = vhost_vdpa_section_end(section);
> -    if (int128_gt(llend, int128_make64(iova_max))) {
> -        error_report("RAM section out of device range (max=0x%" PRIx64
> -                     ", end addr=0x%" PRIx64 ")",
> -                     iova_max, int128_get64(llend));
> -        return true;
> +    if (!memory_region_is_iommu(section->mr)) {
> +        llend = vhost_vdpa_section_end(section);
> +        if (int128_gt(llend, int128_make64(iova_max))) {
> +            error_report("RAM section out of device range (max=0x%" PRIx64
> +                         ", end addr=0x%" PRIx64 ")",
> +                         iova_max, int128_get64(llend));
> +            return true;
> +        }
>      }
> -

Unnecessary changes.

>      return false;
>  }
>
> @@ -185,6 +193,118 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
>      v->iotlb_batch_begin_sent = false;
>  }
>
> +static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> +    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
> +
> +    hwaddr iova = iotlb->iova + iommu->iommu_offset;
> +    struct vhost_vdpa *v = iommu->dev;
> +    void *vaddr;
> +    int ret;
> +    Int128 llend;
> +
> +    if (iotlb->target_as != &address_space_memory) {
> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> +        return;
> +    }
> +    RCU_READ_LOCK_GUARD();
> +    /* check if RAM section out of device range */
> +    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
> +    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
> +        error_report("RAM section out of device range (max=0x%" PRIx64
> +                     ", end addr=0x%" PRIx64 ")",
> +                     v->iova_range.last, int128_get64(llend));
> +        return;
> +    }
> +
> +    vhost_vdpa_iotlb_batch_begin_once(v);

Quoted from you answer in V1:

"
the VHOST_IOTLB_BATCH_END message was send by
vhost_vdpa_listener_commit, because we only use
one vhost_vdpa_memory_listener and no-iommu mode will also need to use
this listener, So we still need to add the batch begin here, based on
my testing after the notify function was called,  the listener_commit
function was also called .so it works well in this situation
"

This assumes the map_notify to be called within the memory
transactions which is not necessarily the case.

I think it could be triggered when guest tries to establish a new
mapping in the vIOMMU. In this case there's no memory transactions at
all?

Thanks
Cindy Lu March 21, 2023, 8:20 a.m. UTC | #2
On Tue, Mar 21, 2023 at 11:21 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Tue, Mar 21, 2023 at 12:20 AM Cindy Lu <lulu@redhat.com> wrote:
> >
> > 1. The vIOMMU support will make vDPA can work in IOMMU mode. This
> > will fix security issues while using the no-IOMMU mode.
> > To support this feature we need to add new functions for IOMMU MR adds and
> > deletes.
> >
> > Also since the SVQ does not support vIOMMU yet, add the check for IOMMU
> > in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
> > the function will return fail.
> >
> > 2. Skip the iova_max check vhost_vdpa_listener_skipped_section(). While
> > MR is IOMMU, move this check to  vhost_vdpa_iommu_map_notify()
> >
> > Verified in vp_vdpa and vdpa_sim_net driver
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/virtio/vhost-vdpa.c         | 149 +++++++++++++++++++++++++++++++--
> >  include/hw/virtio/vhost-vdpa.h |  11 +++
> >  2 files changed, 152 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 0c8c37e786..b36922b365 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -26,6 +26,7 @@
> >  #include "cpu.h"
> >  #include "trace.h"
> >  #include "qapi/error.h"
> > +#include "hw/virtio/virtio-access.h"
> >
> >  /*
> >   * Return one past the end of the end of section. Be careful with uint64_t
> > @@ -60,15 +61,22 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
> >                       iova_min, section->offset_within_address_space);
> >          return true;
> >      }
> > +    /*
> > +     * While using vIOMMU, sometimes the section will be larger than iova_max,
> > +     * but the memory that actually maps is smaller, so move the check to
> > +     * function vhost_vdpa_iommu_map_notify(). That function will use the actual
> > +     * size that maps to the kernel
> > +     */
> >
> > -    llend = vhost_vdpa_section_end(section);
> > -    if (int128_gt(llend, int128_make64(iova_max))) {
> > -        error_report("RAM section out of device range (max=0x%" PRIx64
> > -                     ", end addr=0x%" PRIx64 ")",
> > -                     iova_max, int128_get64(llend));
> > -        return true;
> > +    if (!memory_region_is_iommu(section->mr)) {
> > +        llend = vhost_vdpa_section_end(section);
> > +        if (int128_gt(llend, int128_make64(iova_max))) {
> > +            error_report("RAM section out of device range (max=0x%" PRIx64
> > +                         ", end addr=0x%" PRIx64 ")",
> > +                         iova_max, int128_get64(llend));
> > +            return true;
> > +        }
> >      }
> > -
>
> Unnecessary changes.
>
will fix this
> >      return false;
> >  }
> >
> > @@ -185,6 +193,118 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
> >      v->iotlb_batch_begin_sent = false;
> >  }
> >
> > +static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > +{
> > +    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
> > +
> > +    hwaddr iova = iotlb->iova + iommu->iommu_offset;
> > +    struct vhost_vdpa *v = iommu->dev;
> > +    void *vaddr;
> > +    int ret;
> > +    Int128 llend;
> > +
> > +    if (iotlb->target_as != &address_space_memory) {
> > +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > +        return;
> > +    }
> > +    RCU_READ_LOCK_GUARD();
> > +    /* check if RAM section out of device range */
> > +    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
> > +    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
> > +        error_report("RAM section out of device range (max=0x%" PRIx64
> > +                     ", end addr=0x%" PRIx64 ")",
> > +                     v->iova_range.last, int128_get64(llend));
> > +        return;
> > +    }
> > +
> > +    vhost_vdpa_iotlb_batch_begin_once(v);
>
> Quoted from you answer in V1:
>
> "
> the VHOST_IOTLB_BATCH_END message was send by
> vhost_vdpa_listener_commit, because we only use
> one vhost_vdpa_memory_listener and no-iommu mode will also need to use
> this listener, So we still need to add the batch begin here, based on
> my testing after the notify function was called,  the listener_commit
> function was also called .so it works well in this situation
> "
>
> This assumes the map_notify to be called within the memory
> transactions which is not necessarily the case.
>
> I think it could be triggered when guest tries to establish a new
> mapping in the vIOMMU. In this case there's no memory transactions at
> all?
>
sure, thanks will fix this
> Thanks
>
diff mbox series

Patch

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0c8c37e786..b36922b365 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -26,6 +26,7 @@ 
 #include "cpu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
 
 /*
  * Return one past the end of the end of section. Be careful with uint64_t
@@ -60,15 +61,22 @@  static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
                      iova_min, section->offset_within_address_space);
         return true;
     }
+    /*
+     * While using vIOMMU, sometimes the section will be larger than iova_max,
+     * but the memory that actually maps is smaller, so move the check to
+     * function vhost_vdpa_iommu_map_notify(). That function will use the actual
+     * size that maps to the kernel
+     */
 
-    llend = vhost_vdpa_section_end(section);
-    if (int128_gt(llend, int128_make64(iova_max))) {
-        error_report("RAM section out of device range (max=0x%" PRIx64
-                     ", end addr=0x%" PRIx64 ")",
-                     iova_max, int128_get64(llend));
-        return true;
+    if (!memory_region_is_iommu(section->mr)) {
+        llend = vhost_vdpa_section_end(section);
+        if (int128_gt(llend, int128_make64(iova_max))) {
+            error_report("RAM section out of device range (max=0x%" PRIx64
+                         ", end addr=0x%" PRIx64 ")",
+                         iova_max, int128_get64(llend));
+            return true;
+        }
     }
-
     return false;
 }
 
@@ -185,6 +193,118 @@  static void vhost_vdpa_listener_commit(MemoryListener *listener)
     v->iotlb_batch_begin_sent = false;
 }
 
+static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
+
+    hwaddr iova = iotlb->iova + iommu->iommu_offset;
+    struct vhost_vdpa *v = iommu->dev;
+    void *vaddr;
+    int ret;
+    Int128 llend;
+
+    if (iotlb->target_as != &address_space_memory) {
+        error_report("Wrong target AS \"%s\", only system memory is allowed",
+                     iotlb->target_as->name ? iotlb->target_as->name : "none");
+        return;
+    }
+    RCU_READ_LOCK_GUARD();
+    /* check if RAM section out of device range */
+    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
+    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
+        error_report("RAM section out of device range (max=0x%" PRIx64
+                     ", end addr=0x%" PRIx64 ")",
+                     v->iova_range.last, int128_get64(llend));
+        return;
+    }
+
+    vhost_vdpa_iotlb_batch_begin_once(v);
+
+    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
+        bool read_only;
+
+        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) {
+            return;
+        }
+
+        ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+                                 iotlb->addr_mask + 1, vaddr, read_only);
+        if (ret) {
+            error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
+                         "0x%" HWADDR_PRIx ", %p) = %d (%m)",
+                         v, iova, iotlb->addr_mask + 1, vaddr, ret);
+        }
+    } else {
+        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+                                   iotlb->addr_mask + 1);
+        if (ret) {
+            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+                         "0x%" HWADDR_PRIx ") = %d (%m)",
+                         v, iova, iotlb->addr_mask + 1, ret);
+        }
+    }
+}
+
+static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
+                                        MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+    struct vdpa_iommu *iommu;
+    Int128 end;
+    int iommu_idx;
+    IOMMUMemoryRegion *iommu_mr;
+    int ret;
+
+    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+
+    iommu = g_malloc0(sizeof(*iommu));
+    end = int128_add(int128_make64(section->offset_within_region),
+                     section->size);
+    end = int128_sub(end, int128_one());
+    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                   MEMTXATTRS_UNSPECIFIED);
+    iommu->iommu_mr = iommu_mr;
+    iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify,
+                        IOMMU_NOTIFIER_IOTLB_EVENTS,
+                        section->offset_within_region,
+                        int128_get64(end),
+                        iommu_idx);
+    iommu->iommu_offset = section->offset_within_address_space -
+                          section->offset_within_region;
+    iommu->dev = v;
+
+    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
+    if (ret) {
+        g_free(iommu);
+        return;
+    }
+
+    QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next);
+    memory_region_iommu_replay(iommu->iommu_mr, &iommu->n);
+
+    return;
+}
+
+static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
+                                        MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+    struct vdpa_iommu *iommu;
+
+    QLIST_FOREACH(iommu, &v->iommu_list, iommu_next)
+    {
+        if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
+            iommu->n.start == section->offset_within_region) {
+            memory_region_unregister_iommu_notifier(section->mr, &iommu->n);
+            QLIST_REMOVE(iommu, iommu_next);
+            g_free(iommu);
+            break;
+        }
+    }
+}
+
 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
                                            MemoryRegionSection *section)
 {
@@ -199,6 +319,10 @@  static void vhost_vdpa_listener_region_add(MemoryListener *listener,
                                             v->iova_range.last)) {
         return;
     }
+    if (memory_region_is_iommu(section->mr)) {
+        vhost_vdpa_iommu_region_add(listener, section);
+        return;
+    }
 
     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
@@ -278,6 +402,9 @@  static void vhost_vdpa_listener_region_del(MemoryListener *listener,
                                             v->iova_range.last)) {
         return;
     }
+    if (memory_region_is_iommu(section->mr)) {
+        vhost_vdpa_iommu_region_del(listener, section);
+    }
 
     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
@@ -1182,7 +1309,13 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
     }
 
     if (started) {
-        memory_listener_register(&v->listener, &address_space_memory);
+        if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) {
+            error_report("SVQ can not work while IOMMU enable, please disable"
+                         "IOMMU and try again");
+            return -1;
+        }
+        memory_listener_register(&v->listener, dev->vdev->dma_as);
+
         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
     }
 
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index c278a2a8de..e64bfc7f98 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -52,6 +52,8 @@  typedef struct vhost_vdpa {
     struct vhost_dev *dev;
     Error *migration_blocker;
     VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
+    QLIST_HEAD(, vdpa_iommu) iommu_list;
+    IOMMUNotifier n;
 } VhostVDPA;
 
 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range);
@@ -61,4 +63,13 @@  int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
 int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
                          hwaddr size);
 
+typedef struct vdpa_iommu {
+    struct vhost_vdpa *dev;
+    IOMMUMemoryRegion *iommu_mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(vdpa_iommu) iommu_next;
+} VDPAIOMMUState;
+
+
 #endif