diff mbox series

[V2,09/45] vfio/container: preserve descriptors

Message ID 1739542467-226739-10-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New
Headers show
Series Live update: vfio and iommufd | expand

Commit Message

Steven Sistare Feb. 14, 2025, 2:13 p.m. UTC
At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
the saved descriptors, and remembers the reused status for subsequent
patches.  The reused status is cleared when vmstate load finishes.

During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped.  The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 hw/vfio/container.c           | 57 +++++++++++++++++++++++++++++++++++++------
 hw/vfio/cpr-legacy.c          | 45 ++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio-common.h |  1 +
 include/hw/vfio/vfio-cpr.h    |  9 +++++++
 4 files changed, 104 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index eca3362..21f2706 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@ 
 #include "system/reset.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
 #include "pci.h"
 
 VFIOGroupList vfio_group_list =
@@ -413,7 +415,7 @@  static bool vfio_set_iommu(int container_fd, int group_fd,
 }
 
 static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
-                                            Error **errp)
+                                            bool cpr_reused, Error **errp)
 {
     int iommu_type;
     const char *vioc_name;
@@ -424,7 +426,11 @@  static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
         return NULL;
     }
 
-    if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+    /*
+     * If container is reused, just set its type and skip the ioctls, as the
+     * container and group are already configured in the kernel.
+     */
+    if (!cpr_reused && !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
         return NULL;
     }
 
@@ -432,6 +438,7 @@  static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
 
     container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
     container->fd = fd;
+    container->cpr.reused = cpr_reused;
     container->iommu_type = iommu_type;
     return container;
 }
@@ -591,6 +598,7 @@  static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
     group->container = container;
     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
     vfio_kvm_device_add_group(group);
+    cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
     return true;
 }
 
@@ -600,6 +608,7 @@  static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
     group->container = NULL;
     vfio_kvm_device_del_group(group);
     vfio_ram_block_discard_disable(container, false);
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
 }
 
 static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
@@ -612,17 +621,37 @@  static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     VFIOIOMMUClass *vioc = NULL;
     bool new_container = false;
     bool group_was_added = false;
+    bool cpr_reused;
 
     space = vfio_get_address_space(as);
+    fd = cpr_find_fd("vfio_container_for_group", group->groupid);
+    cpr_reused = (fd > 0);
+
+    /*
+     * If the container is reused, then the group is already attached in the
+     * kernel.  If a container with matching fd is found, then update the
+     * userland group list and return.  If not, then after the loop, create
+     * the container struct and group list.
+     */
 
     QLIST_FOREACH(bcontainer, &space->containers, next) {
         container = container_of(bcontainer, VFIOContainer, bcontainer);
-        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
-            return vfio_container_group_add(container, group, errp);
+
+        if (cpr_reused) {
+            if (!vfio_cpr_container_match(container, group, &fd)) {
+                continue;
+            }
+        } else if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            continue;
         }
+
+        return vfio_container_group_add(container, group, errp);
+    }
+
+    if (!cpr_reused) {
+        fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
     }
 
-    fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
     if (fd < 0) {
         goto fail;
     }
@@ -634,7 +663,7 @@  static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         goto fail;
     }
 
-    container = vfio_create_container(fd, group, errp);
+    container = vfio_create_container(fd, group, cpr_reused, errp);
     if (!container) {
         goto fail;
     }
@@ -700,6 +729,7 @@  static void vfio_disconnect_container(VFIOGroup *group)
 
     QLIST_REMOVE(group, container_next);
     group->container = NULL;
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
 
     /*
      * Explicitly release the listener first before unset container,
@@ -753,7 +783,7 @@  static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
     group = g_malloc0(sizeof(*group));
 
     snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
-    group->fd = qemu_open(path, O_RDWR, errp);
+    group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, NULL, errp);
     if (group->fd < 0) {
         goto free_group_exit;
     }
@@ -785,6 +815,7 @@  static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
     return group;
 
 close_fd_exit:
+    cpr_delete_fd("vfio_group", groupid);
     close(group->fd);
 
 free_group_exit:
@@ -806,6 +837,7 @@  static void vfio_put_group(VFIOGroup *group)
     vfio_disconnect_container(group);
     QLIST_REMOVE(group, next);
     trace_vfio_put_group(group->fd);
+    cpr_delete_fd("vfio_group", group->groupid);
     close(group->fd);
     g_free(group);
 }
@@ -815,8 +847,14 @@  static bool vfio_get_device(VFIOGroup *group, const char *name,
 {
     g_autofree struct vfio_device_info *info = NULL;
     int fd;
+    bool cpr_reused;
+
+    fd = cpr_find_fd(name, 0);
+    cpr_reused = (fd >= 0);
+    if (!cpr_reused) {
+        fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    }
 
-    fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
     if (fd < 0) {
         error_setg_errno(errp, errno, "error getting device from group %d",
                          group->groupid);
@@ -861,6 +899,8 @@  static bool vfio_get_device(VFIOGroup *group, const char *name,
     vbasedev->num_irqs = info->num_irqs;
     vbasedev->num_regions = info->num_regions;
     vbasedev->flags = info->flags;
+    vbasedev->cpr.reused = cpr_reused;
+    cpr_resave_fd(name, 0, fd);
 
     trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs);
 
@@ -877,6 +917,7 @@  static void vfio_put_base_device(VFIODevice *vbasedev)
     QLIST_REMOVE(vbasedev, next);
     vbasedev->group = NULL;
     trace_vfio_put_base_device(vbasedev->fd);
+    cpr_delete_fd(vbasedev->name, 0);
     close(vbasedev->fd);
 }
 
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index d0557af..cee0f4e 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -30,10 +30,27 @@  static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
     }
 }
 
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+    VFIOContainer *container = opaque;
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+
+    container->cpr.reused = false;
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            vbasedev->cpr.reused = false;
+        }
+    }
+    return 0;
+}
+
 static const VMStateDescription vfio_container_vmstate = {
     .name = "vfio-container",
     .version_id = 0,
     .minimum_version_id = 0,
+    .post_load = vfio_container_post_load,
     .needed = cpr_needed_for_reuse,
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
@@ -67,3 +84,31 @@  void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
     migrate_del_blocker(&container->cpr.blocker);
     vmstate_unregister(NULL, &vfio_container_vmstate, container);
 }
+
+static bool same_device(int fd1, int fd2)
+{
+    struct stat st1, st2;
+
+    return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+                              int *pfd)
+{
+    if (container->fd == *pfd) {
+        return true;
+    }
+    if (!same_device(container->fd, *pfd)) {
+        return false;
+    }
+    /*
+     * Same device, different fd.  This occurs when the container fd is
+     * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+     * produces duplicates.  De-dup it.
+     */
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
+    close(*pfd);
+    cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+    *pfd = container->fd;
+    return true;
+}
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c482364..780646e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -152,6 +152,7 @@  typedef struct VFIODevice {
     IOMMUFDBackend *iommufd;
     VFIOIOASHwpt *hwpt;
     QLIST_ENTRY(VFIODevice) hwpt_next;
+    VFIODeviceCPR cpr;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4f8346..1a3eee9 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -12,9 +12,15 @@ 
 
 typedef struct VFIOContainerCPR {
     Error *blocker;
+    bool reused;
 } VFIOContainerCPR;
 
+typedef struct VFIODeviceCPR {
+    bool reused;
+} VFIODeviceCPR;
+
 struct VFIOContainer;
+struct VFIOGroup;
 
 int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
                              Error **errp);
@@ -22,4 +28,7 @@  int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
 bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
                                         Error **errp);
 void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+                              struct VFIOGroup *group, int *fd);
 #endif