diff mbox series

[08/18] vfio/common: Record DMA mapped IOVA ranges

Message ID 20230126184948.10478-9-avihaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio: Add migration pre-copy support and device dirty tracking | expand

Commit Message

Avihai Horon Jan. 26, 2023, 6:49 p.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

According to the device DMA logging uAPI, IOVA ranges to be logged by
the device must be provided all at once upon DMA logging start.

As preparation for the following patches which will add device dirty
page tracking, keep a record of all DMA mapped IOVA ranges so later they
can be used for DMA logging start.

Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked.
This is due to the dynamic nature of vIOMMU DMA mapping/unmapping.
Following patches will address the vIOMMU case specifically.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 include/hw/vfio/vfio-common.h |  3 ++
 hw/vfio/common.c              | 86 +++++++++++++++++++++++++++++++++--
 2 files changed, 86 insertions(+), 3 deletions(-)

Comments

Alex Williamson Jan. 27, 2023, 9:42 p.m. UTC | #1
On Thu, 26 Jan 2023 20:49:38 +0200
Avihai Horon <avihaih@nvidia.com> wrote:

> From: Joao Martins <joao.m.martins@oracle.com>
> 
> According to the device DMA logging uAPI, IOVA ranges to be logged by
> the device must be provided all at once upon DMA logging start.
> 
> As preparation for the following patches which will add device dirty
> page tracking, keep a record of all DMA mapped IOVA ranges so later they
> can be used for DMA logging start.
> 
> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked.
> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping.
> Following patches will address the vIOMMU case specifically.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
>  include/hw/vfio/vfio-common.h |  3 ++
>  hw/vfio/common.c              | 86 +++++++++++++++++++++++++++++++++--
>  2 files changed, 86 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 88c2194fb9..d54000d7ae 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -23,6 +23,7 @@
>  
>  #include "exec/memory.h"
>  #include "qemu/queue.h"
> +#include "qemu/iova-tree.h"
>  #include "qemu/notify.h"
>  #include "ui/console.h"
>  #include "hw/display/ramfb.h"
> @@ -94,6 +95,8 @@ typedef struct VFIOContainer {
>      uint64_t max_dirty_bitmap_size;
>      unsigned long pgsizes;
>      unsigned int dma_max_mappings;
> +    IOVATree *mappings;
> +    QemuMutex mappings_mutex;
>      QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>      QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>      QLIST_HEAD(, VFIOGroup) group_list;
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index e554573eb5..fafc361cea 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -43,6 +43,7 @@
>  #include "migration/misc.h"
>  #include "migration/qemu-file.h"
>  #include "sysemu/tpm.h"
> +#include "qemu/iova-tree.h"
>  
>  VFIOGroupList vfio_group_list =
>      QLIST_HEAD_INITIALIZER(vfio_group_list);
> @@ -373,6 +374,11 @@ bool vfio_mig_active(void)
>      return true;
>  }
>  
> +static bool vfio_have_giommu(VFIOContainer *container)
> +{
> +    return !QLIST_EMPTY(&container->giommu_list);
> +}
> +
>  static void vfio_set_migration_error(int err)
>  {
>      MigrationState *ms = migrate_get_current();
> @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
>      return true;
>  }
>  
> +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova,
> +                               hwaddr size, bool readonly)
> +{
> +    DMAMap map = {
> +        .iova = iova,
> +        .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */

<facepalm>
Avihai Horon Feb. 12, 2023, 3:40 p.m. UTC | #2
On 27/01/2023 23:42, Alex Williamson wrote:
> External email: Use caution opening links or attachments
>
>
> On Thu, 26 Jan 2023 20:49:38 +0200
> Avihai Horon <avihaih@nvidia.com> wrote:
>
>> From: Joao Martins <joao.m.martins@oracle.com>
>>
>> According to the device DMA logging uAPI, IOVA ranges to be logged by
>> the device must be provided all at once upon DMA logging start.
>>
>> As preparation for the following patches which will add device dirty
>> page tracking, keep a record of all DMA mapped IOVA ranges so later they
>> can be used for DMA logging start.
>>
>> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked.
>> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping.
>> Following patches will address the vIOMMU case specifically.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  3 ++
>>   hw/vfio/common.c              | 86 +++++++++++++++++++++++++++++++++--
>>   2 files changed, 86 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 88c2194fb9..d54000d7ae 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -23,6 +23,7 @@
>>
>>   #include "exec/memory.h"
>>   #include "qemu/queue.h"
>> +#include "qemu/iova-tree.h"
>>   #include "qemu/notify.h"
>>   #include "ui/console.h"
>>   #include "hw/display/ramfb.h"
>> @@ -94,6 +95,8 @@ typedef struct VFIOContainer {
>>       uint64_t max_dirty_bitmap_size;
>>       unsigned long pgsizes;
>>       unsigned int dma_max_mappings;
>> +    IOVATree *mappings;
>> +    QemuMutex mappings_mutex;
>>       QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>>       QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>>       QLIST_HEAD(, VFIOGroup) group_list;
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index e554573eb5..fafc361cea 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -43,6 +43,7 @@
>>   #include "migration/misc.h"
>>   #include "migration/qemu-file.h"
>>   #include "sysemu/tpm.h"
>> +#include "qemu/iova-tree.h"
>>
>>   VFIOGroupList vfio_group_list =
>>       QLIST_HEAD_INITIALIZER(vfio_group_list);
>> @@ -373,6 +374,11 @@ bool vfio_mig_active(void)
>>       return true;
>>   }
>>
>> +static bool vfio_have_giommu(VFIOContainer *container)
>> +{
>> +    return !QLIST_EMPTY(&container->giommu_list);
>> +}
>> +
>>   static void vfio_set_migration_error(int err)
>>   {
>>       MigrationState *ms = migrate_get_current();
>> @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
>>       return true;
>>   }
>>
>> +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova,
>> +                               hwaddr size, bool readonly)
>> +{
>> +    DMAMap map = {
>> +        .iova = iova,
>> +        .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */
> <facepalm>

I am not sure what's the error you are referring here.
Is it because DMAMap.size is not actually the size?
Something else?

Thanks.
Alex Williamson Feb. 13, 2023, 3:25 p.m. UTC | #3
On Sun, 12 Feb 2023 17:40:06 +0200
Avihai Horon <avihaih@nvidia.com> wrote:

> On 27/01/2023 23:42, Alex Williamson wrote:
> > External email: Use caution opening links or attachments
> >
> >
> > On Thu, 26 Jan 2023 20:49:38 +0200
> > Avihai Horon <avihaih@nvidia.com> wrote:
> >  
> >> From: Joao Martins <joao.m.martins@oracle.com>
> >>
> >> According to the device DMA logging uAPI, IOVA ranges to be logged by
> >> the device must be provided all at once upon DMA logging start.
> >>
> >> As preparation for the following patches which will add device dirty
> >> page tracking, keep a record of all DMA mapped IOVA ranges so later they
> >> can be used for DMA logging start.
> >>
> >> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked.
> >> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping.
> >> Following patches will address the vIOMMU case specifically.
> >>
> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> >> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> >> ---
> >>   include/hw/vfio/vfio-common.h |  3 ++
> >>   hw/vfio/common.c              | 86 +++++++++++++++++++++++++++++++++--
> >>   2 files changed, 86 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >> index 88c2194fb9..d54000d7ae 100644
> >> --- a/include/hw/vfio/vfio-common.h
> >> +++ b/include/hw/vfio/vfio-common.h
> >> @@ -23,6 +23,7 @@
> >>
> >>   #include "exec/memory.h"
> >>   #include "qemu/queue.h"
> >> +#include "qemu/iova-tree.h"
> >>   #include "qemu/notify.h"
> >>   #include "ui/console.h"
> >>   #include "hw/display/ramfb.h"
> >> @@ -94,6 +95,8 @@ typedef struct VFIOContainer {
> >>       uint64_t max_dirty_bitmap_size;
> >>       unsigned long pgsizes;
> >>       unsigned int dma_max_mappings;
> >> +    IOVATree *mappings;
> >> +    QemuMutex mappings_mutex;
> >>       QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
> >>       QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
> >>       QLIST_HEAD(, VFIOGroup) group_list;
> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >> index e554573eb5..fafc361cea 100644
> >> --- a/hw/vfio/common.c
> >> +++ b/hw/vfio/common.c
> >> @@ -43,6 +43,7 @@
> >>   #include "migration/misc.h"
> >>   #include "migration/qemu-file.h"
> >>   #include "sysemu/tpm.h"
> >> +#include "qemu/iova-tree.h"
> >>
> >>   VFIOGroupList vfio_group_list =
> >>       QLIST_HEAD_INITIALIZER(vfio_group_list);
> >> @@ -373,6 +374,11 @@ bool vfio_mig_active(void)
> >>       return true;
> >>   }
> >>
> >> +static bool vfio_have_giommu(VFIOContainer *container)
> >> +{
> >> +    return !QLIST_EMPTY(&container->giommu_list);
> >> +}
> >> +
> >>   static void vfio_set_migration_error(int err)
> >>   {
> >>       MigrationState *ms = migrate_get_current();
> >> @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
> >>       return true;
> >>   }
> >>
> >> +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova,
> >> +                               hwaddr size, bool readonly)
> >> +{
> >> +    DMAMap map = {
> >> +        .iova = iova,
> >> +        .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */  
> > <facepalm>  
> 
> I am not sure what's the error you are referring here.
> Is it because DMAMap.size is not actually the size?
> Something else?

Sorry, I'm just lamenting what a terrible interface IOVATree provides
with this inclusive range nonsense.  Thanks,

Alex
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 88c2194fb9..d54000d7ae 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -23,6 +23,7 @@ 
 
 #include "exec/memory.h"
 #include "qemu/queue.h"
+#include "qemu/iova-tree.h"
 #include "qemu/notify.h"
 #include "ui/console.h"
 #include "hw/display/ramfb.h"
@@ -94,6 +95,8 @@  typedef struct VFIOContainer {
     uint64_t max_dirty_bitmap_size;
     unsigned long pgsizes;
     unsigned int dma_max_mappings;
+    IOVATree *mappings;
+    QemuMutex mappings_mutex;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index e554573eb5..fafc361cea 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -43,6 +43,7 @@ 
 #include "migration/misc.h"
 #include "migration/qemu-file.h"
 #include "sysemu/tpm.h"
+#include "qemu/iova-tree.h"
 
 VFIOGroupList vfio_group_list =
     QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -373,6 +374,11 @@  bool vfio_mig_active(void)
     return true;
 }
 
+static bool vfio_have_giommu(VFIOContainer *container)
+{
+    return !QLIST_EMPTY(&container->giommu_list);
+}
+
 static void vfio_set_migration_error(int err)
 {
     MigrationState *ms = migrate_get_current();
@@ -450,6 +456,51 @@  static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
     return true;
 }
 
+static int vfio_record_mapping(VFIOContainer *container, hwaddr iova,
+                               hwaddr size, bool readonly)
+{
+    DMAMap map = {
+        .iova = iova,
+        .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */
+        .perm = readonly ? IOMMU_RO : IOMMU_RW,
+    };
+    int ret;
+
+    if (vfio_have_giommu(container)) {
+        return 0;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) {
+        ret = iova_tree_insert(container->mappings, &map);
+        if (ret) {
+            if (ret == IOVA_ERR_INVALID) {
+                ret = -EINVAL;
+            } else if (ret == IOVA_ERR_OVERLAP) {
+                ret = -EEXIST;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static void vfio_erase_mapping(VFIOContainer *container, hwaddr iova,
+                                hwaddr size)
+{
+    DMAMap map = {
+        .iova = iova,
+        .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */
+    };
+
+    if (vfio_have_giommu(container)) {
+        return;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) {
+        iova_tree_remove(container->mappings, map);
+    }
+}
+
 static int vfio_dma_unmap_bitmap(VFIOContainer *container,
                                  hwaddr iova, ram_addr_t size,
                                  IOMMUTLBEntry *iotlb)
@@ -550,6 +601,8 @@  static int vfio_dma_unmap(VFIOContainer *container,
                                             DIRTY_CLIENTS_NOCODE);
     }
 
+    vfio_erase_mapping(container, iova, size);
+
     return 0;
 }
 
@@ -563,6 +616,16 @@  static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
         .iova = iova,
         .size = size,
     };
+    int ret;
+
+    ret = vfio_record_mapping(container, iova, size, readonly);
+    if (ret) {
+        error_report("vfio: Failed to record mapping, iova: 0x%" HWADDR_PRIx
+                     ", size: 0x" RAM_ADDR_FMT ", ret: %d (%s)",
+                     iova, size, ret, strerror(-ret));
+
+        return ret;
+    }
 
     if (!readonly) {
         map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
@@ -579,8 +642,12 @@  static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
         return 0;
     }
 
+    ret = -errno;
     error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
-    return -errno;
+
+    vfio_erase_mapping(container, iova, size);
+
+    return ret;
 }
 
 static void vfio_host_win_add(VFIOContainer *container,
@@ -2134,16 +2201,23 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     QLIST_INIT(&container->giommu_list);
     QLIST_INIT(&container->hostwin_list);
     QLIST_INIT(&container->vrdl_list);
+    container->mappings = iova_tree_new();
+    if (!container->mappings) {
+        error_setg(errp, "Cannot allocate DMA mappings tree");
+        ret = -ENOMEM;
+        goto free_container_exit;
+    }
+    qemu_mutex_init(&container->mappings_mutex);
 
     ret = vfio_init_container(container, group->fd, errp);
     if (ret) {
-        goto free_container_exit;
+        goto destroy_mappings_exit;
     }
 
     ret = vfio_ram_block_discard_disable(container, true);
     if (ret) {
         error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
-        goto free_container_exit;
+        goto destroy_mappings_exit;
     }
 
     switch (container->iommu_type) {
@@ -2279,6 +2353,10 @@  listener_release_exit:
 enable_discards_exit:
     vfio_ram_block_discard_disable(container, false);
 
+destroy_mappings_exit:
+    qemu_mutex_destroy(&container->mappings_mutex);
+    iova_tree_destroy(container->mappings);
+
 free_container_exit:
     g_free(container);
 
@@ -2333,6 +2411,8 @@  static void vfio_disconnect_container(VFIOGroup *group)
         }
 
         trace_vfio_disconnect_container(container->fd);
+        qemu_mutex_destroy(&container->mappings_mutex);
+        iova_tree_destroy(container->mappings);
         close(container->fd);
         g_free(container);