diff mbox series

[v2,1/7] migration: Add precopy initial data capability

Message ID 20230517155219.10691-2-avihaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series migration: Add precopy initial data capability and VFIO precopy support | expand

Commit Message

Avihai Horon May 17, 2023, 3:52 p.m. UTC
Migration downtime estimation is calculated based on bandwidth and
remaining migration data. This assumes that loading of migration data in
the destination takes a negligible amount of time and that downtime
depends only on network speed.

While this may be true for RAM, it's not necessarily true for other
migration users. For example, loading the data of a VFIO device in the
destination might require from the device to allocate resources, prepare
internal data structures and so on. These operations can take a
significant amount of time which can increase migration downtime.

This patch adds a new capability "precopy initial data" that allows the
source to send initial precopy data and the destination to ACK that this
data has been loaded. Migration will not attempt to stop the source VM
and complete the migration until this ACK is received.

This will allow migration users to send initial precopy data which can
be used to reduce downtime (e.g., by pre-allocating resources), while
making sure that the source will stop the VM and complete the migration
only after this initial precopy data is sent and loaded in the
destination so it will have full effect.

This new capability relies on the return path capability to communicate
from the destination back to the source.

The actual implementation of the capability will be added in the
following patches.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 qapi/migration.json |  9 ++++++++-
 migration/options.h |  1 +
 migration/options.c | 21 +++++++++++++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/qapi/migration.json b/qapi/migration.json
index 179af0c4d8..a6c1942064 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -487,6 +487,13 @@ 
 #     and should not affect the correctness of postcopy migration.
 #     (since 7.1)
 #
+# @precopy-initial-data: If enabled, migration will not attempt to
+#     stop source VM and complete the migration until an ACK is
+#     received from the destination that initial precopy data has been
+#     loaded.  This can improve downtime if there are migration users
+#     that support precopy initial data.  'return-path' capability
+#     must be enabled to use it.  (since 8.1)
+#
 # Features:
 #
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -502,7 +509,7 @@ 
            'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
            { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
            'validate-uuid', 'background-snapshot',
-           'zero-copy-send', 'postcopy-preempt'] }
+           'zero-copy-send', 'postcopy-preempt', 'precopy-initial-data'] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/migration/options.h b/migration/options.h
index 5cca3326d6..bba70a33bf 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -44,6 +44,7 @@  bool migrate_pause_before_switchover(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_postcopy_preempt(void);
 bool migrate_postcopy_ram(void);
+bool migrate_precopy_initial_data(void);
 bool migrate_rdma_pin_all(void);
 bool migrate_release_ram(void);
 bool migrate_return_path(void);
diff --git a/migration/options.c b/migration/options.c
index c2a278ee2d..0a31921a7a 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -184,6 +184,8 @@  Property migration_properties[] = {
     DEFINE_PROP_MIG_CAP("x-zero-copy-send",
             MIGRATION_CAPABILITY_ZERO_COPY_SEND),
 #endif
+    DEFINE_PROP_MIG_CAP("x-precopy-initial-data",
+                        MIGRATION_CAPABILITY_PRECOPY_INITIAL_DATA),
 
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -286,6 +288,13 @@  bool migrate_postcopy_ram(void)
     return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
 }
 
+bool migrate_precopy_initial_data(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return s->capabilities[MIGRATION_CAPABILITY_PRECOPY_INITIAL_DATA];
+}
+
 bool migrate_rdma_pin_all(void)
 {
     MigrationState *s = migrate_get_current();
@@ -546,6 +555,18 @@  bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
         }
     }
 
+    if (new_caps[MIGRATION_CAPABILITY_PRECOPY_INITIAL_DATA]) {
+        if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
+            error_setg(errp, "Capability 'precopy-initial-data' requires "
+                             "capability 'return-path'");
+            return false;
+        }
+
+        /* Disable this capability until it's implemented */
+        error_setg(errp, "'precopy-initial-data' is not implemented yet");
+        return false;
+    }
+
     return true;
 }