diff mbox

[qemu,v16,16/19] vfio: Add host side DMA window capabilities

Message ID 1462344751-28281-17-git-send-email-aik@ozlabs.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy May 4, 2016, 6:52 a.m. UTC
There are going to be multiple IOMMUs per a container. This moves
the single host IOMMU parameter set to a list of VFIOHostDMAWindow.

This should cause no behavioral change and will be used later by
the SPAPR TCE IOMMU v2 which will also add a vfio_host_win_del() helper.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
Changes:
v16:
* adjusted commit log with changes from v15

v15:
* s/vfio_host_iommu_add/vfio_host_win_add/
* s/VFIOHostIOMMU/VFIOHostDMAWindow/
---
 hw/vfio/common.c              | 65 +++++++++++++++++++++++++++++++++----------
 include/hw/vfio/vfio-common.h |  9 ++++--
 2 files changed, 57 insertions(+), 17 deletions(-)

Comments

Alex Williamson May 13, 2016, 10:25 p.m. UTC | #1
On Wed,  4 May 2016 16:52:28 +1000
Alexey Kardashevskiy <aik@ozlabs.ru> wrote:

> There are going to be multiple IOMMUs per a container. This moves
> the single host IOMMU parameter set to a list of VFIOHostDMAWindow.
> 
> This should cause no behavioral change and will be used later by
> the SPAPR TCE IOMMU v2 which will also add a vfio_host_win_del() helper.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> Changes:
> v16:
> * adjusted commit log with changes from v15
> 
> v15:
> * s/vfio_host_iommu_add/vfio_host_win_add/
> * s/VFIOHostIOMMU/VFIOHostDMAWindow/
> ---
>  hw/vfio/common.c              | 65 +++++++++++++++++++++++++++++++++----------
>  include/hw/vfio/vfio-common.h |  9 ++++--
>  2 files changed, 57 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 496eb82..3f2fb23 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -29,6 +29,7 @@
>  #include "exec/memory.h"
>  #include "hw/hw.h"
>  #include "qemu/error-report.h"
> +#include "qemu/range.h"
>  #include "sysemu/kvm.h"
>  #include "trace.h"
>  
> @@ -239,6 +240,45 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
>      return -errno;
>  }
>  
> +static VFIOHostDMAWindow *vfio_host_win_lookup(VFIOContainer *container,
> +                                               hwaddr min_iova, hwaddr max_iova)
> +{
> +    VFIOHostDMAWindow *hostwin;
> +
> +    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
> +        if (hostwin->min_iova <= min_iova && max_iova <= hostwin->max_iova) {
> +            return hostwin;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static int vfio_host_win_add(VFIOContainer *container,
> +                             hwaddr min_iova, hwaddr max_iova,
> +                             uint64_t iova_pgsizes)
> +{
> +    VFIOHostDMAWindow *hostwin;
> +
> +    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
> +        if (ranges_overlap(min_iova, max_iova - min_iova + 1,
> +                           hostwin->min_iova,
> +                           hostwin->max_iova - hostwin->min_iova + 1)) {

Why does vfio_host_win_lookup() not also use ranges_overlap()?  In
fact, why don't we call vfio_host_win_lookup here to find the conflict?

> +            error_report("%s: Overlapped IOMMU are not enabled", __func__);
> +            return -1;

Nobody here tests the return value, shouldn't this be fatal?

> +        }
> +    }
> +
> +    hostwin = g_malloc0(sizeof(*hostwin));
> +
> +    hostwin->min_iova = min_iova;
> +    hostwin->max_iova = max_iova;
> +    hostwin->iova_pgsizes = iova_pgsizes;
> +    QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
> +
> +    return 0;
> +}
> +
>  static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>  {
>      return (!memory_region_is_ram(section->mr) &&
> @@ -352,7 +392,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>      }
>      end = int128_get64(int128_sub(llend, int128_one()));
>  
> -    if ((iova < container->min_iova) || (end > container->max_iova)) {
> +    if (!vfio_host_win_lookup(container, iova, end)) {
>          error_report("vfio: IOMMU container %p can't map guest IOVA region"
>                       " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
>                       container, iova, end);
> @@ -367,10 +407,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
>  
>          trace_vfio_listener_region_add_iommu(iova, end);
>          /*
> -         * FIXME: We should do some checking to see if the
> -         * capabilities of the host VFIO IOMMU are adequate to model
> -         * the guest IOMMU
> -         *
>           * FIXME: For VFIO iommu types which have KVM acceleration to
>           * avoid bouncing all map/unmaps through qemu this way, this
>           * would be the right place to wire that up (tell the KVM
> @@ -826,16 +862,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>           * existing Type1 IOMMUs generally support any IOVA we're
>           * going to actually try in practice.
>           */
> -        container->min_iova = 0;
> -        container->max_iova = (hwaddr)-1;
> -
> -        /* Assume just 4K IOVA page size */
> -        container->iova_pgsizes = 0x1000;
>          info.argsz = sizeof(info);
>          ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
>          /* Ignore errors */
>          if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {

if (ret || !(info.flags && VFIO_IOMMU_INFO_PGSIZES)) {
    /* Assume 4k IOVA page size */
    info.iova_pgsizes = 4096;
}

vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);

> -            container->iova_pgsizes = info.iova_pgsizes;
> +            vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
> +        } else {
> +            /* Assume just 4K IOVA page size */
> +            vfio_host_win_add(container, 0, (hwaddr)-1, 0x1000);
>          }
>      } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
>                 ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
> @@ -892,11 +926,12 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>              ret = -errno;
>              goto listener_release_exit;
>          }
> -        container->min_iova = info.dma32_window_start;
> -        container->max_iova = container->min_iova + info.dma32_window_size - 1;
>  
> -        /* Assume just 4K IOVA pages for now */
> -        container->iova_pgsizes = 0x1000;
> +        /* The default table uses 4K pages */
> +        vfio_host_win_add(container, info.dma32_window_start,
> +                          info.dma32_window_start +
> +                          info.dma32_window_size - 1,
> +                          0x1000);
>      } else {
>          error_report("vfio: No available IOMMU models");
>          ret = -EINVAL;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index c72e45a..808263b 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -82,9 +82,8 @@ typedef struct VFIOContainer {
>       * contiguous IOVA window.  We may need to generalize that in
>       * future
>       */
> -    hwaddr min_iova, max_iova;
> -    uint64_t iova_pgsizes;
>      QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
> +    QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>      QLIST_HEAD(, VFIOGroup) group_list;
>      QLIST_ENTRY(VFIOContainer) next;
>  } VFIOContainer;
> @@ -97,6 +96,12 @@ typedef struct VFIOGuestIOMMU {
>      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
>  } VFIOGuestIOMMU;
>  
> +typedef struct VFIOHostDMAWindow {
> +    hwaddr min_iova, max_iova;
> +    uint64_t iova_pgsizes;
> +    QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
> +} VFIOHostDMAWindow;
> +
>  typedef struct VFIODeviceOps VFIODeviceOps;
>  
>  typedef struct VFIODevice {
David Gibson May 27, 2016, 12:36 a.m. UTC | #2
On Fri, May 13, 2016 at 04:25:59PM -0600, Alex Williamson wrote:
> On Wed,  4 May 2016 16:52:28 +1000
> Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> 
> > There are going to be multiple IOMMUs per a container. This moves
> > the single host IOMMU parameter set to a list of VFIOHostDMAWindow.
> > 
> > This should cause no behavioral change and will be used later by
> > the SPAPR TCE IOMMU v2 which will also add a vfio_host_win_del() helper.
> > 
> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > Changes:
> > v16:
> > * adjusted commit log with changes from v15
> > 
> > v15:
> > * s/vfio_host_iommu_add/vfio_host_win_add/
> > * s/VFIOHostIOMMU/VFIOHostDMAWindow/
> > ---
> >  hw/vfio/common.c              | 65 +++++++++++++++++++++++++++++++++----------
> >  include/hw/vfio/vfio-common.h |  9 ++++--
> >  2 files changed, 57 insertions(+), 17 deletions(-)
> > 
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index 496eb82..3f2fb23 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -29,6 +29,7 @@
> >  #include "exec/memory.h"
> >  #include "hw/hw.h"
> >  #include "qemu/error-report.h"
> > +#include "qemu/range.h"
> >  #include "sysemu/kvm.h"
> >  #include "trace.h"
> >  
> > @@ -239,6 +240,45 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> >      return -errno;
> >  }
> >  
> > +static VFIOHostDMAWindow *vfio_host_win_lookup(VFIOContainer *container,
> > +                                               hwaddr min_iova, hwaddr max_iova)
> > +{
> > +    VFIOHostDMAWindow *hostwin;
> > +
> > +    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
> > +        if (hostwin->min_iova <= min_iova && max_iova <= hostwin->max_iova) {
> > +            return hostwin;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static int vfio_host_win_add(VFIOContainer *container,
> > +                             hwaddr min_iova, hwaddr max_iova,
> > +                             uint64_t iova_pgsizes)
> > +{
> > +    VFIOHostDMAWindow *hostwin;
> > +
> > +    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
> > +        if (ranges_overlap(min_iova, max_iova - min_iova + 1,
> > +                           hostwin->min_iova,
> > +                           hostwin->max_iova - hostwin->min_iova + 1)) {
> 
> Why does vfio_host_win_lookup() not also use ranges_overlap()?  In
> fact, why don't we call vfio_host_win_lookup here to find the conflict?
> 
> > +            error_report("%s: Overlapped IOMMU are not enabled", __func__);
> > +            return -1;
> 
> Nobody here tests the return value, shouldn't this be fatal?

Hm, yes.  I think hw_error() would be the right choice here.  This
would represent either a qemu programming error, or seriously
unexpected behaviour from the host kernel.
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 496eb82..3f2fb23 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -29,6 +29,7 @@ 
 #include "exec/memory.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
+#include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "trace.h"
 
@@ -239,6 +240,45 @@  static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
     return -errno;
 }
 
+static VFIOHostDMAWindow *vfio_host_win_lookup(VFIOContainer *container,
+                                               hwaddr min_iova, hwaddr max_iova)
+{
+    VFIOHostDMAWindow *hostwin;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (hostwin->min_iova <= min_iova && max_iova <= hostwin->max_iova) {
+            return hostwin;
+        }
+    }
+
+    return NULL;
+}
+
+static int vfio_host_win_add(VFIOContainer *container,
+                             hwaddr min_iova, hwaddr max_iova,
+                             uint64_t iova_pgsizes)
+{
+    VFIOHostDMAWindow *hostwin;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (ranges_overlap(min_iova, max_iova - min_iova + 1,
+                           hostwin->min_iova,
+                           hostwin->max_iova - hostwin->min_iova + 1)) {
+            error_report("%s: Overlapped IOMMU are not enabled", __func__);
+            return -1;
+        }
+    }
+
+    hostwin = g_malloc0(sizeof(*hostwin));
+
+    hostwin->min_iova = min_iova;
+    hostwin->max_iova = max_iova;
+    hostwin->iova_pgsizes = iova_pgsizes;
+    QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
+
+    return 0;
+}
+
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
     return (!memory_region_is_ram(section->mr) &&
@@ -352,7 +392,7 @@  static void vfio_listener_region_add(MemoryListener *listener,
     }
     end = int128_get64(int128_sub(llend, int128_one()));
 
-    if ((iova < container->min_iova) || (end > container->max_iova)) {
+    if (!vfio_host_win_lookup(container, iova, end)) {
         error_report("vfio: IOMMU container %p can't map guest IOVA region"
                      " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
                      container, iova, end);
@@ -367,10 +407,6 @@  static void vfio_listener_region_add(MemoryListener *listener,
 
         trace_vfio_listener_region_add_iommu(iova, end);
         /*
-         * FIXME: We should do some checking to see if the
-         * capabilities of the host VFIO IOMMU are adequate to model
-         * the guest IOMMU
-         *
          * FIXME: For VFIO iommu types which have KVM acceleration to
          * avoid bouncing all map/unmaps through qemu this way, this
          * would be the right place to wire that up (tell the KVM
@@ -826,16 +862,14 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
          * existing Type1 IOMMUs generally support any IOVA we're
          * going to actually try in practice.
          */
-        container->min_iova = 0;
-        container->max_iova = (hwaddr)-1;
-
-        /* Assume just 4K IOVA page size */
-        container->iova_pgsizes = 0x1000;
         info.argsz = sizeof(info);
         ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
         /* Ignore errors */
         if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
-            container->iova_pgsizes = info.iova_pgsizes;
+            vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
+        } else {
+            /* Assume just 4K IOVA page size */
+            vfio_host_win_add(container, 0, (hwaddr)-1, 0x1000);
         }
     } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
                ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
@@ -892,11 +926,12 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             ret = -errno;
             goto listener_release_exit;
         }
-        container->min_iova = info.dma32_window_start;
-        container->max_iova = container->min_iova + info.dma32_window_size - 1;
 
-        /* Assume just 4K IOVA pages for now */
-        container->iova_pgsizes = 0x1000;
+        /* The default table uses 4K pages */
+        vfio_host_win_add(container, info.dma32_window_start,
+                          info.dma32_window_start +
+                          info.dma32_window_size - 1,
+                          0x1000);
     } else {
         error_report("vfio: No available IOMMU models");
         ret = -EINVAL;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c72e45a..808263b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -82,9 +82,8 @@  typedef struct VFIOContainer {
      * contiguous IOVA window.  We may need to generalize that in
      * future
      */
-    hwaddr min_iova, max_iova;
-    uint64_t iova_pgsizes;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
+    QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
     QLIST_ENTRY(VFIOContainer) next;
 } VFIOContainer;
@@ -97,6 +96,12 @@  typedef struct VFIOGuestIOMMU {
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
 
+typedef struct VFIOHostDMAWindow {
+    hwaddr min_iova, max_iova;
+    uint64_t iova_pgsizes;
+    QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
+} VFIOHostDMAWindow;
+
 typedef struct VFIODeviceOps VFIODeviceOps;
 
 typedef struct VFIODevice {