@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy
support by reporting the VFIO_MIGRATION_PRE_COPY flag in the
VFIO_DEVICE_FEATURE_MIGRATION ioctl.
+When pre-copy is supported, it's possible to further reduce downtime by
+enabling "switchover-ack" migration capability.
+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream
+and recommends that the initial bytes are sent and loaded in the destination
+before stopping the source VM. Enabling this migration capability will
+guarantee that and thus, can potentially reduce downtime even further.
+
Note that currently VFIO migration is supported only for a single device. This
is due to VFIO migration's lack of P2P support. However, P2P support is planned
to be added later on.
@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows:
* A ``save_live_iterate`` function that reads the VFIO device's data from the
vendor driver during iterative pre-copy phase.
+* A ``switchover_ack_needed`` function that checks if the VFIO device uses
+ "switchover-ack" migration capability when this capability is enabled.
+
* A ``save_state`` function to save the device config space if it is present.
* A ``save_live_complete_precopy`` function that sets the VFIO device in
@@ -69,6 +69,7 @@ typedef struct VFIOMigration {
uint64_t mig_flags;
uint64_t precopy_init_size;
uint64_t precopy_dirty_size;
+ bool initial_data_sent;
} VFIOMigration;
typedef struct VFIOAddressSpace {
@@ -18,6 +18,8 @@
#include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h"
#include "migration/migration.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
@@ -45,6 +47,7 @@
#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
/*
* This is an arbitrary size based on migration of mlx5 devices, where typically
@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque)
migration->data_buffer = NULL;
migration->precopy_init_size = 0;
migration->precopy_dirty_size = 0;
+ migration->initial_data_sent = false;
vfio_migration_cleanup(vbasedev);
trace_vfio_save_cleanup(vbasedev->name);
}
@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
if (data_size < 0) {
return data_size;
}
- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
vfio_update_estimated_pending_data(migration, data_size);
+ if (migrate_switchover_ack() && !migration->precopy_init_size &&
+ !migration->initial_data_sent) {
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT);
+ migration->initial_data_sent = true;
+ } else {
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+ }
+
trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size,
migration->precopy_dirty_size);
@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
}
break;
}
+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT:
+ {
+ if (!vfio_precopy_supported(vbasedev) ||
+ !migrate_switchover_ack()) {
+ error_report("%s: Received INIT_DATA_SENT but switchover ack "
+ "is not used", vbasedev->name);
+ return -EINVAL;
+ }
+
+ ret = qemu_loadvm_approve_switchover();
+ if (ret) {
+ error_report(
+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)",
+ vbasedev->name, ret, strerror(-ret));
+ }
+
+ return ret;
+ }
default:
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
return -EINVAL;
@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
return ret;
}
+static bool vfio_switchover_ack_needed(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ return vfio_precopy_supported(vbasedev);
+}
+
static const SaveVMHandlers savevm_vfio_handlers = {
.save_setup = vfio_save_setup,
.save_cleanup = vfio_save_cleanup,
@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
+ .switchover_ack_needed = vfio_switchover_ack_needed,
};
/* ---------------------------------------------------------------------- */