diff mbox series

[09/18] vfio/common: Add device dirty page tracking start/stop

Message ID 20230126184948.10478-10-avihaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio: Add migration pre-copy support and device dirty tracking | expand

Commit Message

Avihai Horon Jan. 26, 2023, 6:49 p.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

Add device dirty page tracking start/stop functionality. This uses the
device DMA logging uAPI to start and stop dirty page tracking by device.

Device dirty page tracking is used only if all devices within a
container support device dirty page tracking.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 include/hw/vfio/vfio-common.h |   2 +
 hw/vfio/common.c              | 211 +++++++++++++++++++++++++++++++++-
 2 files changed, 211 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index d54000d7ae..cde6ffb9d6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -151,6 +151,8 @@  typedef struct VFIODevice {
     VFIOMigration *migration;
     Error *migration_blocker;
     OnOffAuto pre_copy_dirty_page_tracking;
+    bool dirty_pages_supported;
+    bool dirty_tracking;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fafc361cea..005c060c67 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -422,6 +422,22 @@  static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
     return true;
 }
 
+static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (!vbasedev->dirty_pages_supported) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
 /*
  * Check if all VFIO devices are running and migration is active, which is
  * essentially equivalent to the migration being in pre-copy phase.
@@ -1355,13 +1371,192 @@  static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
     return ret;
 }
 
+static int vfio_devices_dma_logging_set(VFIOContainer *container,
+                                        struct vfio_device_feature *feature)
+{
+    bool status = (feature->flags & VFIO_DEVICE_FEATURE_MASK) ==
+                  VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
+    VFIODevice *vbasedev;
+    VFIOGroup *group;
+    int ret = 0;
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->dirty_tracking == status) {
+                continue;
+            }
+
+            ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
+            if (ret) {
+                ret = -errno;
+                error_report("%s: Failed to set DMA logging %s, err %d (%s)",
+                             vbasedev->name, status ? "start" : "stop", ret,
+                             strerror(errno));
+                goto out;
+            }
+            vbasedev->dirty_tracking = status;
+        }
+    }
+
+out:
+    return ret;
+}
+
+static int vfio_devices_dma_logging_stop(VFIOContainer *container)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+
+    feature->argsz = sizeof(buf);
+    feature->flags = VFIO_DEVICE_FEATURE_SET;
+    feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
+
+    return vfio_devices_dma_logging_set(container, feature);
+}
+
+static gboolean vfio_device_dma_logging_range_add(DMAMap *map, gpointer data)
+{
+    struct vfio_device_feature_dma_logging_range **out = data;
+    struct vfio_device_feature_dma_logging_range *range = *out;
+
+    range->iova = map->iova;
+    /* IOVATree is inclusive, DMA logging uAPI isn't, so add 1 to length */
+    range->length = map->size + 1;
+
+    *out = ++range;
+
+    return false;
+}
+
+static gboolean vfio_iova_tree_get_first(DMAMap *map, gpointer data)
+{
+    DMAMap *first = data;
+
+    first->iova = map->iova;
+    first->size = map->size;
+
+    return true;
+}
+
+static gboolean vfio_iova_tree_get_last(DMAMap *map, gpointer data)
+{
+    DMAMap *last = data;
+
+    last->iova = map->iova;
+    last->size = map->size;
+
+    return false;
+}
+
+static struct vfio_device_feature *
+vfio_device_feature_dma_logging_start_create(VFIOContainer *container)
+{
+    struct vfio_device_feature *feature;
+    size_t feature_size;
+    struct vfio_device_feature_dma_logging_control *control;
+    struct vfio_device_feature_dma_logging_range *ranges;
+    unsigned int max_ranges;
+    unsigned int cur_ranges;
+
+    feature_size = sizeof(struct vfio_device_feature) +
+                   sizeof(struct vfio_device_feature_dma_logging_control);
+    feature = g_malloc0(feature_size);
+    feature->argsz = feature_size;
+    feature->flags = VFIO_DEVICE_FEATURE_SET;
+    feature->flags |= VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
+
+    control = (struct vfio_device_feature_dma_logging_control *)feature->data;
+    control->page_size = qemu_real_host_page_size();
+
+    QEMU_LOCK_GUARD(&container->mappings_mutex);
+
+    /*
+     * DMA logging uAPI guarantees to support at least num_ranges that fits into
+     * a single host kernel page. To be on the safe side, use this as a limit
+     * from which to merge to a single range.
+     */
+    max_ranges = qemu_real_host_page_size() / sizeof(*ranges);
+    cur_ranges = iova_tree_nnodes(container->mappings);
+    control->num_ranges = (cur_ranges <= max_ranges) ? cur_ranges : 1;
+    ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
+                        control->num_ranges);
+    if (!ranges) {
+        g_free(feature);
+        errno = ENOMEM;
+
+        return NULL;
+    }
+
+    control->ranges = (uint64_t)ranges;
+    if (cur_ranges <= max_ranges) {
+        iova_tree_foreach(container->mappings,
+                          vfio_device_dma_logging_range_add, &ranges);
+    } else {
+        DMAMap first, last;
+
+        iova_tree_foreach(container->mappings, vfio_iova_tree_get_first,
+                          &first);
+        iova_tree_foreach(container->mappings, vfio_iova_tree_get_last, &last);
+        ranges->iova = first.iova;
+        /* IOVATree is inclusive, DMA logging uAPI isn't, so add 1 to length */
+        ranges->length = (last.iova - first.iova) + last.size + 1;
+    }
+
+    return feature;
+}
+
+static void vfio_device_feature_dma_logging_start_destroy(
+    struct vfio_device_feature *feature)
+{
+    struct vfio_device_feature_dma_logging_control *control =
+        (struct vfio_device_feature_dma_logging_control *)feature->data;
+    struct vfio_device_feature_dma_logging_range *ranges =
+        (struct vfio_device_feature_dma_logging_range *)control->ranges;
+
+    g_free(ranges);
+    g_free(feature);
+}
+
+static int vfio_devices_dma_logging_start(VFIOContainer *container)
+{
+    struct vfio_device_feature *feature;
+    int ret;
+
+    feature = vfio_device_feature_dma_logging_start_create(container);
+    if (!feature) {
+        return -errno;
+    }
+
+    ret = vfio_devices_dma_logging_set(container, feature);
+    if (ret) {
+        vfio_devices_dma_logging_stop(container);
+    }
+
+    vfio_device_feature_dma_logging_start_destroy(feature);
+
+    return ret;
+}
+
 static void vfio_listener_log_global_start(MemoryListener *listener)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     int ret;
 
-    ret = vfio_set_dirty_page_tracking(container, true);
+    if (vfio_devices_all_device_dirty_tracking(container)) {
+        if (vfio_have_giommu(container)) {
+            /* Device dirty page tracking currently doesn't support vIOMMU */
+            return;
+        }
+
+        ret = vfio_devices_dma_logging_start(container);
+    } else {
+        ret = vfio_set_dirty_page_tracking(container, true);
+    }
+
     if (ret) {
+        error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
+                     ret, strerror(-ret));
         vfio_set_migration_error(ret);
     }
 }
@@ -1371,8 +1566,20 @@  static void vfio_listener_log_global_stop(MemoryListener *listener)
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     int ret;
 
-    ret = vfio_set_dirty_page_tracking(container, false);
+    if (vfio_devices_all_device_dirty_tracking(container)) {
+        if (vfio_have_giommu(container)) {
+            /* Device dirty page tracking currently doesn't support vIOMMU */
+            return;
+        }
+
+        ret = vfio_devices_dma_logging_stop(container);
+    } else {
+        ret = vfio_set_dirty_page_tracking(container, false);
+    }
+
     if (ret) {
+        error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
+                     ret, strerror(-ret));
         vfio_set_migration_error(ret);
     }
 }