diff mbox series

[v2,15/19] util/vfio-helpers: Report error when IOMMU page size is not supported

Message ID 20201026105504.4023620-16-philmd@redhat.com
State New, archived
Headers show
Series util/vfio-helpers: Allow using multiple MSIX IRQs | expand

Commit Message

Philippe Mathieu-Daudé Oct. 26, 2020, 10:55 a.m. UTC
This driver uses the host page size to align its memory regions,
but this size is not always compatible with the IOMMU. Add a
check if the size matches, and bails out providing a hint what
is the minimum page size the driver should use.

Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 util/vfio-helpers.c | 28 ++++++++++++++++++++++++++--
 util/trace-events   |  1 +
 2 files changed, 27 insertions(+), 2 deletions(-)

Comments

Auger Eric Oct. 26, 2020, 4:12 p.m. UTC | #1
Hi Philippe,

On 10/26/20 11:55 AM, Philippe Mathieu-Daudé wrote:
> This driver uses the host page size to align its memory regions,
> but this size is not always compatible with the IOMMU. Add a
> check if the size matches, and bails out providing a hint what
> is the minimum page size the driver should use.
> 
> Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> ---
>  util/vfio-helpers.c | 28 ++++++++++++++++++++++++++--
>  util/trace-events   |  1 +
>  2 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index 5e288dfa113..874d76c2a2a 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  #include <sys/ioctl.h>
>  #include <linux/vfio.h>
>  #include "qapi/error.h"
> @@ -288,7 +289,7 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
>  }
>  
>  static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
> -                              Error **errp)
> +                              size_t *requested_page_size, Error **errp)
>  {
>      int ret;
>      int i;
> @@ -299,6 +300,8 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
>      struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
>      char *group_file = NULL;
>  
> +    assert(requested_page_size && is_power_of_2(*requested_page_size));
> +
>      s->usable_iova_ranges = NULL;
>  
>      /* Create a new container */
> @@ -373,6 +376,27 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
>          ret = -errno;
>          goto fail;
>      }
> +    if (!(iommu_info->flags & VFIO_IOMMU_INFO_PGSIZES)) {
> +        error_setg(errp, "Failed to get IOMMU page size info");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    trace_qemu_vfio_iommu_iova_pgsizes(iommu_info->iova_pgsizes);
> +    if (!(iommu_info->iova_pgsizes & *requested_page_size)) {
> +        g_autofree char *req_page_size_str = size_to_str(*requested_page_size);
> +        g_autofree char *min_page_size_str = NULL;
> +        uint64_t pgsizes_masked;
> +
> +        pgsizes_masked = MAKE_64BIT_MASK(0, ctz64(*requested_page_size));
> +        *requested_page_size = 1U << ctz64(iommu_info->iova_pgsizes
> +                                           & ~pgsizes_masked);
> +        min_page_size_str = size_to_str(*requested_page_size);
> +        error_setg(errp, "Unsupported IOMMU page size: %s", req_page_size_str);
> +        error_append_hint(errp, "Minimum IOMMU page size: %s\n",
> +                          min_page_size_str);
this blocks the 64kB tentative support. Before I was able to run the UC
with 64kB page host while the MPS used by the device is 4kB. Of course I
have no evidence yet my work is correct - besides it works in my case
for a sepcific device - but at least we should make sure we do not
introduce a new blocker here.

Also as discussed together
f68453237b  block/nvme: Map doorbells pages write-only
causes troubles with 64kB pages as there, you attempt to map 2
consecutive 4kB pages with different attributes. The 2d mmap fails.

Thanks

Eric
> +        ret = -EINVAL;
> +        goto fail;
> +    }
>  
>      /*
>       * if the kernel does not report usable IOVA regions, choose
> @@ -520,7 +544,7 @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, size_t *min_page_size,
>      int r;
>      QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
>  
> -    r = qemu_vfio_init_pci(s, device, errp);
> +    r = qemu_vfio_init_pci(s, device, min_page_size, errp);
>      if (r) {
>          g_free(s);
>          return NULL;
> diff --git a/util/trace-events b/util/trace-events
> index 7faad2a718c..3c36def9f30 100644
> --- a/util/trace-events
> +++ b/util/trace-events
> @@ -87,6 +87,7 @@ qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host
>  qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p"
>  qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx"
>  qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
> +qemu_vfio_iommu_iova_pgsizes(uint64_t iova_pgsizes) "iommu page size bitmask: 0x%08"PRIx64
>  qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")"
>  qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")"
>  qemu_vfio_region_info(const char *desc, uint64_t offset, uint64_t size, uint32_t cap_offset) "region '%s' ofs 0x%"PRIx64" size %"PRId64" cap_ofs %"PRId32
>
diff mbox series

Patch

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 5e288dfa113..874d76c2a2a 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -11,6 +11,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
 #include "qapi/error.h"
@@ -288,7 +289,7 @@  static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
 }
 
 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
-                              Error **errp)
+                              size_t *requested_page_size, Error **errp)
 {
     int ret;
     int i;
@@ -299,6 +300,8 @@  static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
     struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
     char *group_file = NULL;
 
+    assert(requested_page_size && is_power_of_2(*requested_page_size));
+
     s->usable_iova_ranges = NULL;
 
     /* Create a new container */
@@ -373,6 +376,27 @@  static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
         ret = -errno;
         goto fail;
     }
+    if (!(iommu_info->flags & VFIO_IOMMU_INFO_PGSIZES)) {
+        error_setg(errp, "Failed to get IOMMU page size info");
+        ret = -EINVAL;
+        goto fail;
+    }
+    trace_qemu_vfio_iommu_iova_pgsizes(iommu_info->iova_pgsizes);
+    if (!(iommu_info->iova_pgsizes & *requested_page_size)) {
+        g_autofree char *req_page_size_str = size_to_str(*requested_page_size);
+        g_autofree char *min_page_size_str = NULL;
+        uint64_t pgsizes_masked;
+
+        pgsizes_masked = MAKE_64BIT_MASK(0, ctz64(*requested_page_size));
+        *requested_page_size = 1U << ctz64(iommu_info->iova_pgsizes
+                                           & ~pgsizes_masked);
+        min_page_size_str = size_to_str(*requested_page_size);
+        error_setg(errp, "Unsupported IOMMU page size: %s", req_page_size_str);
+        error_append_hint(errp, "Minimum IOMMU page size: %s\n",
+                          min_page_size_str);
+        ret = -EINVAL;
+        goto fail;
+    }
 
     /*
      * if the kernel does not report usable IOVA regions, choose
@@ -520,7 +544,7 @@  QEMUVFIOState *qemu_vfio_open_pci(const char *device, size_t *min_page_size,
     int r;
     QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
 
-    r = qemu_vfio_init_pci(s, device, errp);
+    r = qemu_vfio_init_pci(s, device, min_page_size, errp);
     if (r) {
         g_free(s);
         return NULL;
diff --git a/util/trace-events b/util/trace-events
index 7faad2a718c..3c36def9f30 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -87,6 +87,7 @@  qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host
 qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p"
 qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx"
 qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
+qemu_vfio_iommu_iova_pgsizes(uint64_t iova_pgsizes) "iommu page size bitmask: 0x%08"PRIx64
 qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")"
 qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")"
 qemu_vfio_region_info(const char *desc, uint64_t offset, uint64_t size, uint32_t cap_offset) "region '%s' ofs 0x%"PRIx64" size %"PRId64" cap_ofs %"PRId32