diff mbox series

[11/18] vfio/common: Add device dirty page bitmap sync

Message ID 20230126184948.10478-12-avihaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio: Add migration pre-copy support and device dirty tracking | expand

Commit Message

Avihai Horon Jan. 26, 2023, 6:49 p.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

Add device dirty page bitmap sync functionality. This uses the device
DMA logging uAPI to sync dirty page bitmap from the device.

Device dirty page bitmap sync is used only if all devices within a
container support device dirty page tracking.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 hw/vfio/common.c | 93 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 82 insertions(+), 11 deletions(-)

Comments

Alex Williamson Jan. 27, 2023, 11:37 p.m. UTC | #1
On Thu, 26 Jan 2023 20:49:41 +0200
Avihai Horon <avihaih@nvidia.com> wrote:

> From: Joao Martins <joao.m.martins@oracle.com>
> 
> Add device dirty page bitmap sync functionality. This uses the device
> DMA logging uAPI to sync dirty page bitmap from the device.
> 
> Device dirty page bitmap sync is used only if all devices within a
> container support device dirty page tracking.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
>  hw/vfio/common.c | 93 ++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 82 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 3caa73d6f7..0003f2421d 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -355,6 +355,9 @@ static void vfio_bitmap_dealloc(VFIOBitmap *vbmap)
>      g_free(vbmap);
>  }
>  
> +static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
> +                                 uint64_t size, ram_addr_t ram_addr);
> +
>  bool vfio_mig_active(void)
>  {
>      VFIOGroup *group;
> @@ -582,10 +585,19 @@ static int vfio_dma_unmap(VFIOContainer *container,
>          .iova = iova,
>          .size = size,
>      };
> +    int ret;
>  
> -    if (iotlb && container->dirty_pages_supported &&
> -        vfio_devices_all_running_and_mig_active(container)) {
> -        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> +    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
> +        if (!vfio_devices_all_device_dirty_tracking(container) &&
> +            container->dirty_pages_supported) {
> +            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> +        }
> +
> +        ret = vfio_get_dirty_bitmap(container, iova, size,
> +                                    iotlb->translated_addr);
> +        if (ret) {
> +            return ret;
> +        }

Isn't the ordering backwards here?  Only after the range is unmapped
can we know that this container can no longer dirty pages within the
range.  Thanks,

Alex
Avihai Horon Feb. 12, 2023, 3:49 p.m. UTC | #2
On 28/01/2023 1:37, Alex Williamson wrote:
> External email: Use caution opening links or attachments
>
>
> On Thu, 26 Jan 2023 20:49:41 +0200
> Avihai Horon <avihaih@nvidia.com> wrote:
>
>> From: Joao Martins <joao.m.martins@oracle.com>
>>
>> Add device dirty page bitmap sync functionality. This uses the device
>> DMA logging uAPI to sync dirty page bitmap from the device.
>>
>> Device dirty page bitmap sync is used only if all devices within a
>> container support device dirty page tracking.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> ---
>>   hw/vfio/common.c | 93 ++++++++++++++++++++++++++++++++++++++++++------
>>   1 file changed, 82 insertions(+), 11 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 3caa73d6f7..0003f2421d 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -355,6 +355,9 @@ static void vfio_bitmap_dealloc(VFIOBitmap *vbmap)
>>       g_free(vbmap);
>>   }
>>
>> +static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
>> +                                 uint64_t size, ram_addr_t ram_addr);
>> +
>>   bool vfio_mig_active(void)
>>   {
>>       VFIOGroup *group;
>> @@ -582,10 +585,19 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>           .iova = iova,
>>           .size = size,
>>       };
>> +    int ret;
>>
>> -    if (iotlb && container->dirty_pages_supported &&
>> -        vfio_devices_all_running_and_mig_active(container)) {
>> -        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> +    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
>> +        if (!vfio_devices_all_device_dirty_tracking(container) &&
>> +            container->dirty_pages_supported) {
>> +            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>> +        }
>> +
>> +        ret = vfio_get_dirty_bitmap(container, iova, size,
>> +                                    iotlb->translated_addr);
>> +        if (ret) {
>> +            return ret;
>> +        }
> Isn't the ordering backwards here?  Only after the range is unmapped
> can we know that this container can no longer dirty pages within the
> range.

Oops, I thought that it's OK to query the dirty bitmap when we get the 
vIOMMU unmap notification.
I will reverse the order.

Thanks.
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 3caa73d6f7..0003f2421d 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -355,6 +355,9 @@  static void vfio_bitmap_dealloc(VFIOBitmap *vbmap)
     g_free(vbmap);
 }
 
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+                                 uint64_t size, ram_addr_t ram_addr);
+
 bool vfio_mig_active(void)
 {
     VFIOGroup *group;
@@ -582,10 +585,19 @@  static int vfio_dma_unmap(VFIOContainer *container,
         .iova = iova,
         .size = size,
     };
+    int ret;
 
-    if (iotlb && container->dirty_pages_supported &&
-        vfio_devices_all_running_and_mig_active(container)) {
-        return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
+        if (!vfio_devices_all_device_dirty_tracking(container) &&
+            container->dirty_pages_supported) {
+            return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+        }
+
+        ret = vfio_get_dirty_bitmap(container, iova, size,
+                                    iotlb->translated_addr);
+        if (ret) {
+            return ret;
+        }
     }
 
     while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
@@ -611,12 +623,6 @@  static int vfio_dma_unmap(VFIOContainer *container,
         return -errno;
     }
 
-    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
-        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
-                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
-                                            DIRTY_CLIENTS_NOCODE);
-    }
-
     vfio_erase_mapping(container, iova, size);
 
     return 0;
@@ -1584,6 +1590,65 @@  static void vfio_listener_log_global_stop(MemoryListener *listener)
     }
 }
 
+static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
+                                          hwaddr size, void *bitmap)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                        sizeof(struct vfio_device_feature_dma_logging_report),
+                        sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_dma_logging_report *report =
+        (struct vfio_device_feature_dma_logging_report *)feature->data;
+
+    report->iova = iova;
+    report->length = size;
+    report->page_size = qemu_real_host_page_size();
+    report->bitmap = (uint64_t)bitmap;
+
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
+                                           VFIOBitmap *vbmap, hwaddr iova,
+                                           hwaddr size)
+{
+    VFIODevice *vbasedev;
+    VFIOGroup *group;
+    int ret;
+
+    if (vfio_have_giommu(container)) {
+        /* Device dirty page tracking currently doesn't support vIOMMU */
+        bitmap_set(vbmap->bitmap, 0, vbmap->pages);
+
+        return 0;
+    }
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            ret = vfio_device_dma_logging_report(vbasedev, iova, size,
+                                                 vbmap->bitmap);
+            if (ret) {
+                error_report("%s: Failed to get DMA logging report, iova: "
+                             "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
+                             ", err: %d (%s)",
+                             vbasedev->name, iova, size, ret, strerror(-ret));
+
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
                                    hwaddr iova, hwaddr size)
 {
@@ -1627,7 +1692,8 @@  static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
     VFIOBitmap *vbmap;
     int ret;
 
-    if (!container->dirty_pages_supported) {
+    if (!container->dirty_pages_supported &&
+        !vfio_devices_all_device_dirty_tracking(container)) {
         cpu_physical_memory_set_dirty_range(ram_addr, size,
                                             tcg_enabled() ? DIRTY_CLIENTS_ALL :
                                             DIRTY_CLIENTS_NOCODE);
@@ -1639,7 +1705,12 @@  static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
         return -errno;
     }
 
-    ret = vfio_query_dirty_bitmap(container, vbmap, iova, size);
+    if (vfio_devices_all_device_dirty_tracking(container)) {
+        ret = vfio_devices_query_dirty_bitmap(container, vbmap, iova, size);
+    } else {
+        ret = vfio_query_dirty_bitmap(container, vbmap, iova, size);
+    }
+
     if (ret) {
         goto out;
     }