diff mbox series

[RFC,V1,6/6] migration: cpr-transfer mode

Message ID 1719776648-435073-7-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New
Headers show
Series Live update: cpr-transfer | expand

Commit Message

Steve Sistare June 30, 2024, 7:44 p.m. UTC
Add the cpr-transfer migration mode.  Usage:
  qemu-system-$arch -machine anon-alloc=memfd ...

  start new QEMU with "-incoming <uri-1> -cpr-uri <uri-2>"

  Issue commands to old QEMU:
  migrate_set_parameter mode cpr-transfer
  migrate_set_parameter cpr-uri <uri-2>
  migrate -d <uri-1>

The migrate command stops the VM, saves CPR state to uri-2, saves
normal migration state to uri-1, and old QEMU enters the postmigrate
state.  The user starts new QEMU on the same host as old QEMU, with the
same arguments as old QEMU, plus the -incoming option.  Guest RAM is
preserved in place, albeit with new virtual addresses in new QEMU.

This mode requires a second migration channel, specified by the
cpr-uri migration property on the outgoing side, and by the cpr-uri
QEMU command-line option on the incoming side.  The channel must
be a type, such as unix socket, that supports SCM_RIGHTS.

Memory-backend objects must have the share=on attribute, but
memory-backend-epc is not supported.  The VM must be started with
the '-machine anon-alloc=memfd' option, which allows anonymous
memory to be transferred in place to the new process.  The memfds
are kept open by sending the descriptors to new QEMU via the
cpr-uri, which must support SCM_RIGHTS, and they are mmap'd
in new QEMU.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 migration/cpr.c           |  9 ++++++++-
 migration/migration.c     | 37 +++++++++++++++++++++++++++++++++++++
 migration/ram.c           |  1 +
 migration/vmstate-types.c |  5 +++--
 qapi/migration.json       | 26 +++++++++++++++++++++++++-
 stubs/vmstate.c           |  7 +++++++
 6 files changed, 81 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/migration/cpr.c b/migration/cpr.c
index 50c130c..7ac01a9 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -58,7 +58,7 @@  static const VMStateDescription vmstate_cpr_fd = {
         VMSTATE_UINT32(namelen, CprFd),
         VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
         VMSTATE_INT32(id, CprFd),
-        VMSTATE_INT32(fd, CprFd),
+        VMSTATE_FD(fd, CprFd),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -172,6 +172,8 @@  int cpr_state_save(Error **errp)
 
     if (mode == MIG_MODE_CPR_EXEC) {
         f = cpr_exec_output(errp);
+    } else if (mode == MIG_MODE_CPR_TRANSFER) {
+        f = cpr_transfer_output(migrate_cpr_uri(), errp);
     } else {
         return 0;
     }
@@ -209,6 +211,11 @@  int cpr_state_load(Error **errp)
      */
     if (cpr_exec_has_state()) {
         f = cpr_exec_input(errp);
+        if (cpr_uri) {
+            warn_report("ignoring cpr-uri option for migration mode cpr-exec");
+        }
+    } else if (cpr_uri) {
+        f = cpr_transfer_input(cpr_uri, errp);
     } else {
         return 0;
     }
diff --git a/migration/migration.c b/migration/migration.c
index a4a020e..65a36a6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -77,6 +77,7 @@  static NotifierWithReturnList migration_state_notifiers[MIG_MODE__MAX] = {
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_EXEC),
+    NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
 };
 
 /* Messages sent on the return path from destination to source */
@@ -205,6 +206,12 @@  migration_channels_and_transport_compatible(MigrationAddress *addr,
         return false;
     }
 
+    if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+        addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+        error_setg(errp, "Migration requires streamable transport (eg unix)");
+        return false;
+    }
+
     return true;
 }
 
@@ -1697,6 +1704,7 @@  bool migrate_mode_is_cpr(MigrationState *s)
 {
     MigMode mode = s->parameters.mode;
     return mode == MIG_MODE_CPR_REBOOT ||
+           mode == MIG_MODE_CPR_TRANSFER ||
            mode == MIG_MODE_CPR_EXEC;
 }
 
@@ -2038,6 +2046,12 @@  static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
         return false;
     }
 
+    if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+        !s->parameters.cpr_uri) {
+        error_setg(errp, "cpr-transfer mode requires setting cpr-uri");
+        return false;
+    }
+
     if (migration_is_blocked(errp)) {
         return false;
     }
@@ -2144,6 +2158,29 @@  void qmp_migrate(const char *uri, bool has_channels,
         goto out;
     }
 
+    /*
+     * For cpr-transfer mode, the target first reads CPR state, which cannot
+     * complete until cpr_state_save above finishes, then the target creates
+     * the migration channel and listens.  We must wait for the channel to
+     * be created before connecting to it.
+     *
+     * This implementation of waiting is a hack.  It restricts the channel
+     * type, and will loop forever if the target dies.  It should be defined
+     * as a main-loop event that calls connect on the back end.
+     */
+    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
+        SocketAddress *saddr = &addr->u.socket;
+        if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+            saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
+            while (access(saddr->u.fd.str, F_OK)) {
+                usleep(1000000);
+            }
+        } else {
+            error_setg(&local_err, "cpr-transfer requires a unix channel");
+            goto out;
+        }
+    }
+
     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
         SocketAddress *saddr = &addr->u.socket;
         if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
diff --git a/migration/ram.c b/migration/ram.c
index 45b8f00..1e1e05e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -219,6 +219,7 @@  bool migrate_ram_is_ignored(RAMBlock *block)
     MigMode mode = migrate_mode();
     return !qemu_ram_is_migratable(block) ||
            mode == MIG_MODE_CPR_EXEC ||
+           mode == MIG_MODE_CPR_TRANSFER ||
            (migrate_ignore_shared() && qemu_ram_is_shared(block)
                                     && qemu_ram_is_named_file(block));
 }
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 6e45a4a..618b7fb 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -15,6 +15,7 @@ 
 #include "qemu-file.h"
 #include "migration.h"
 #include "migration/vmstate.h"
+#include "migration/client-options.h"
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "trace.h"
@@ -321,7 +322,7 @@  static int get_fd(QEMUFile *f, void *pv, size_t size,
 {
     int32_t *v = pv;
     qemu_get_sbe32s(f, v);
-    if (*v < 0) {
+    if (*v < 0 || migrate_mode() == MIG_MODE_CPR_EXEC) {
         return 0;
     }
     *v = qemu_file_get_fd(f);
@@ -334,7 +335,7 @@  static int put_fd(QEMUFile *f, void *pv, size_t size,
     int32_t *v = pv;
 
     qemu_put_sbe32s(f, v);
-    if (*v < 0) {
+    if (*v < 0 || migrate_mode() == MIG_MODE_CPR_EXEC) {
         return 0;
     }
     return qemu_file_put_fd(f, *v);
diff --git a/qapi/migration.json b/qapi/migration.json
index df62456..cd2d949 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -626,9 +626,33 @@ 
 #     with the '-machine anon-alloc=memfd' option.
 #
 #     (since 9.1)
+#
+# @cpr-transfer: This mode allows the user to transfer a guest to a
+#     new QEMU instance on the same host with minimal guest pause
+#     time, by preserving guest RAM in place, albeit with new virtual
+#     addresses in new QEMU.
+#
+#     The user starts new QEMU on the same host as old QEMU, with the
+#     the same arguments as old QEMU, plus the -incoming option.  The
+#     user issues the migrate command to old QEMU, which stops the VM,
+#     saves state to the migration channels, and enters the postmigrate
+#     state.  Execution resumes in new QEMU.  Guest RAM is preserved in
+#     place, albeit with new virtual addresses in new QEMU.
+#
+#     This mode requires a second migration channel, specified by the
+#     cpr-uri migration property on the outgoing side, and by
+#     the cpr-uri QEMU command-line option on the incoming
+#     side.  The channel must be a type, such as unix socket, that
+#     supports SCM_RIGHTS.
+#
+#     Memory-backend objects must have the share=on attribute, but
+#     memory-backend-epc is not supported.  The VM must be started
+#     with the '-machine anon-alloc=memfd' option.
+#
+#     (since 9.1)
 ##
 { 'enum': 'MigMode',
-  'data': [ 'normal', 'cpr-reboot', 'cpr-exec' ] }
+  'data': [ 'normal', 'cpr-reboot', 'cpr-exec', 'cpr-transfer' ] }
 
 ##
 # @ZeroPageDetection:
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index 8513d92..c190762 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,5 +1,7 @@ 
 #include "qemu/osdep.h"
 #include "migration/vmstate.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/client-options.h"
 
 int vmstate_register_with_alias_id(VMStateIf *obj,
                                    uint32_t instance_id,
@@ -21,3 +23,8 @@  bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
 {
     return true;
 }
+
+MigMode migrate_mode(void)
+{
+    return MIG_MODE_NORMAL;
+}