@@ -2,7 +2,8 @@
% Andrew Cooper <<andrew.cooper3@citrix.com>>
Wen Congyang <<wency@cn.fujitsu.com>>
Yang Hongyang <<hongyang.yang@easystack.cn>>
-% Revision 2
+ Joshua Otto <<jtotto@uwaterloo.ca>>
+% Revision 3
Introduction
============
@@ -123,7 +124,9 @@ type 0x00000000: END
0x00000005: CHECKPOINT_STATE
- 0x00000006 - 0x7FFFFFFF: Reserved for future _mandatory_
+ 0x00000006: POSTCOPY_TRANSITION_END
+
+ 0x00000007 - 0x7FFFFFFF: Reserved for future _mandatory_
records.
0x80000000 - 0xFFFFFFFF: Reserved for future _optional_
@@ -304,6 +307,18 @@ While Secondary is running in below loop:
b. Send _CHECKPOINT\_SVM\_SUSPENDED_ to primary
4. Checkpoint
+POSTCOPY\_TRANSITION\_END
+-------------------------
+
+A postcopy transition end record marks the end of a postcopy transition in a
+libxl live migration stream. It indicates that control of the stream should be
+returned to libxc for the postcopy memory migration phase.
+
+ 0 1 2 3 4 5 6 7 octet
+ +-------------------------------------------------+
+
+The postcopy transition end record contains no fields; its body_length is 0.
+
Future Extensions
=================
@@ -1375,10 +1375,12 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd,
#define LIBXL_SUSPEND_DEBUG 1
#define LIBXL_SUSPEND_LIVE 2
-int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int fd,
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
int flags, /* LIBXL_SUSPEND_* */
unsigned int precopy_iterations,
unsigned int precopy_dirty_threshold,
+ int recv_fd,
+ bool *postcopy_transitioned, /* OUT */
const libxl_asyncop_how *ao_how)
LIBXL_EXTERNAL_CALLERS_ONLY;
@@ -349,10 +349,31 @@ static int libxl__save_live_migration_simple_precopy_policy(
return XGS_POLICY_CONTINUE_PRECOPY;
}
+static void postcopy_transition_done(libxl__egc *egc,
+ libxl__stream_write_state *sws, int rc);
+
static void libxl__save_live_migration_postcopy_transition_callback(void *user)
{
- /* XXX we're not yet ready to deal with this */
- assert(0);
+ libxl__save_helper_state *shs = user;
+ libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+ sws->postcopy_transition_callback = postcopy_transition_done;
+ libxl__stream_write_start_postcopy_transition(shs->egc, sws);
+}
+
+static void postcopy_transition_done(libxl__egc *egc,
+ libxl__stream_write_state *sws,
+ int rc)
+{
+ libxl__domain_save_state *dss = sws->dss;
+
+ /* Past here, it's _possible_ that the domain may execute at the
+ * destination, so - unless we're given positive confirmation by the
+ * destination that it failed to resume there - we must assume it has. */
+ assert(dss->postcopy_transitioned);
+ *dss->postcopy_transitioned = !rc;
+
+ /* Return control to libxc. */
+ libxl__xc_domain_saverestore_async_callback_done(egc, &sws->shs, !rc);
}
/*----- main code for saving, in order of execution -----*/
@@ -488,7 +488,8 @@ static void domain_suspend_cb(libxl__egc *egc,
static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
unsigned int precopy_iterations,
- unsigned int precopy_dirty_threshold,
+ unsigned int precopy_dirty_threshold, int recv_fd,
+ bool *postcopy_transitioned,
const libxl_asyncop_how *ao_how)
{
AO_CREATE(ctx, domid, ao_how);
@@ -508,6 +509,8 @@ static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
dss->domid = domid;
dss->fd = fd;
+ dss->recv_fd = recv_fd;
+ dss->postcopy_transitioned = postcopy_resumed_remotely;
dss->type = type;
dss->live = flags & LIBXL_SUSPEND_LIVE;
dss->debug = flags & LIBXL_SUSPEND_DEBUG;
@@ -532,18 +535,26 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
{
return do_domain_suspend(ctx, domid, fd, flags,
LIBXL_LM_PRECOPY_ITERATIONS_DEFAULT,
- LIBXL_LM_DIRTY_THRESHOLD_DEFAULT, ao_how);
+ LIBXL_LM_DIRTY_THRESHOLD_DEFAULT, -1,
+ NULL, ao_how);
}
-int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
- unsigned int precopy_iterations,
- unsigned int precopy_dirty_threshold,
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
+ int flags, unsigned int precopy_iterations,
+ unsigned int precopy_dirty_threshold, int recv_fd,
+ bool *postcopy_transitioned,
const libxl_asyncop_how *ao_how)
{
+ if (!postcopy_transitioned) {
+ errno = EINVAL;
+ return -1;
+ }
+
flags |= LIBXL_SUSPEND_LIVE;
- return do_domain_suspend(ctx, domid, fd, flags, precopy_iterations,
- precopy_dirty_threshold, ao_how);
+ return do_domain_suspend(ctx, domid, send_fd, flags, precopy_iterations,
+ precopy_dirty_threshold, recv_fd,
+ postcopy_transitioned, ao_how);
}
int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid)
@@ -3194,17 +3194,25 @@ struct libxl__stream_write_state {
void (*completion_callback)(libxl__egc *egc,
libxl__stream_write_state *sws,
int rc);
- void (*checkpoint_callback)(libxl__egc *egc,
- libxl__stream_write_state *sws,
- int rc);
+ /* Checkpointing and postcopy live migration are mutually exclusive. */
+ union {
+ void (*checkpoint_callback)(libxl__egc *egc,
+ libxl__stream_write_state *sws,
+ int rc);
+ void (*postcopy_transition_callback)(libxl__egc *egc,
+ libxl__stream_write_state *sws,
+ int rc);
+ };
/* Private */
int rc;
bool running;
enum {
SWS_PHASE_NORMAL,
SWS_PHASE_CHECKPOINT,
- SWS_PHASE_CHECKPOINT_STATE
+ SWS_PHASE_CHECKPOINT_STATE,
+ SWS_PHASE_POSTCOPY_TRANSITION
} phase;
+ bool postcopy_transitioned;
bool sync_teardown; /* Only used to coordinate shutdown on error path. */
libxl__save_helper_state shs;
@@ -3227,6 +3235,10 @@ _hidden void libxl__stream_write_init(libxl__stream_write_state *stream);
_hidden void libxl__stream_write_start(libxl__egc *egc,
libxl__stream_write_state *stream);
_hidden void
+libxl__stream_write_start_postcopy_transition(
+ libxl__egc *egc,
+ libxl__stream_write_state *stream);
+_hidden void
libxl__stream_write_start_checkpoint(libxl__egc *egc,
libxl__stream_write_state *stream);
_hidden void
@@ -3290,6 +3302,7 @@ struct libxl__domain_save_state {
int fd;
int fdfl; /* original flags on fd */
int recv_fd;
+ bool *postcopy_transitioned;
libxl_domain_type type;
int live;
int debug;
@@ -31,12 +31,13 @@ typedef struct libxl__sr_rec_hdr
/* All records must be aligned up to an 8 octet boundary */
#define REC_ALIGN_ORDER 3U
-#define REC_TYPE_END 0x00000000U
-#define REC_TYPE_LIBXC_CONTEXT 0x00000001U
-#define REC_TYPE_EMULATOR_XENSTORE_DATA 0x00000002U
-#define REC_TYPE_EMULATOR_CONTEXT 0x00000003U
-#define REC_TYPE_CHECKPOINT_END 0x00000004U
-#define REC_TYPE_CHECKPOINT_STATE 0x00000005U
+#define REC_TYPE_END 0x00000000U
+#define REC_TYPE_LIBXC_CONTEXT 0x00000001U
+#define REC_TYPE_EMULATOR_XENSTORE_DATA 0x00000002U
+#define REC_TYPE_EMULATOR_CONTEXT 0x00000003U
+#define REC_TYPE_CHECKPOINT_END 0x00000004U
+#define REC_TYPE_CHECKPOINT_STATE 0x00000005U
+#define REC_TYPE_POSTCOPY_TRANSITION_END 0x00000006U
typedef struct libxl__sr_emulator_hdr
{
@@ -22,6 +22,9 @@
* Entry points from outside:
* - libxl__stream_write_start()
* - Start writing a stream from the start.
+ * - libxl__stream_write_postcopy_transition()
+ * - Write the records required to permit postcopy resumption at the
+ * migration target.
* - libxl__stream_write_start_checkpoint()
* - Write the records which form a checkpoint into a stream.
*
@@ -65,6 +68,9 @@ static void stream_complete(libxl__egc *egc,
libxl__stream_write_state *stream, int rc);
static void stream_done(libxl__egc *egc,
libxl__stream_write_state *stream, int rc);
+static void postcopy_transition_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc);
static void checkpoint_done(libxl__egc *egc,
libxl__stream_write_state *stream,
int rc);
@@ -91,7 +97,9 @@ static void emulator_context_record_done(libxl__egc *egc,
libxl__stream_write_state *stream);
static void write_phase_end_record(libxl__egc *egc,
libxl__stream_write_state *stream);
-
+static void postcopy_transition_end_record_done(
+ libxl__egc *egc,
+ libxl__stream_write_state *stream);
static void checkpoint_end_record_done(libxl__egc *egc,
libxl__stream_write_state *stream);
@@ -211,6 +219,7 @@ void libxl__stream_write_init(libxl__stream_write_state *stream)
stream->rc = 0;
stream->running = false;
stream->phase = SWS_PHASE_NORMAL;
+ stream->postcopy_transitioned = false;
stream->sync_teardown = false;
FILLZERO(stream->dc);
stream->record_done_callback = NULL;
@@ -287,6 +296,22 @@ void libxl__stream_write_start(libxl__egc *egc,
stream_complete(egc, stream, rc);
}
+void libxl__stream_write_start_postcopy_transition(
+ libxl__egc *egc,
+ libxl__stream_write_state *stream)
+{
+ libxl__domain_save_state *dss = stream->dss;
+
+ assert(stream->running);
+ assert(dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE);
+ assert(stream->phase == SWS_PHASE_NORMAL);
+ assert(!stream->postcopy_transitioned);
+
+ stream->phase = SWS_PHASE_POSTCOPY_TRANSITION;
+
+ write_emulator_xenstore_record(egc, stream);
+}
+
void libxl__stream_write_start_checkpoint(libxl__egc *egc,
libxl__stream_write_state *stream)
{
@@ -369,7 +394,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void,
* If the stream is not still alive, we must not continue any work.
*/
if (libxl__stream_write_inuse(stream)) {
- if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE)
+ if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE) {
/*
* For remus, if libxl__xc_domain_save_done() completes,
* there was an error sending data to the secondary.
@@ -377,8 +402,17 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void,
* return value (Please refer to libxl__remus_teardown())
*/
stream_complete(egc, stream, 0);
- else
+ } else if (stream->postcopy_transitioned) {
+ /*
+ * If, on the other hand, this is a normal migration that had a
+ * postcopy migration stage, we're completely done at this point and
+ * want to report any error received here to our caller.
+ */
+ assert(stream->phase == SWS_PHASE_NORMAL);
+ write_phase_end_record(egc, stream);
+ } else {
write_emulator_xenstore_record(egc, stream);
+ }
}
}
@@ -550,6 +584,11 @@ static void write_phase_end_record(libxl__egc *egc,
what = "checkpoint end record";
cb = checkpoint_end_record_done;
break;
+ case SWS_PHASE_POSTCOPY_TRANSITION:
+ rec.type = REC_TYPE_POSTCOPY_TRANSITION_END;
+ what = "postcopy transition end record";
+ cb = postcopy_transition_end_record_done;
+ break;
default:
/* SWS_PHASE_CHECKPOINT_STATE has no end record */
assert(false);
@@ -558,6 +597,13 @@ static void write_phase_end_record(libxl__egc *egc,
setup_write(egc, stream, what, &rec, NULL, cb);
}
+static void postcopy_transition_end_record_done(
+ libxl__egc *egc,
+ libxl__stream_write_state *stream)
+{
+ postcopy_transition_done(egc, stream, 0);
+}
+
static void checkpoint_end_record_done(libxl__egc *egc,
libxl__stream_write_state *stream)
{
@@ -600,6 +646,13 @@ static void stream_complete(libxl__egc *egc,
*/
checkpoint_state_done(egc, stream, rc);
break;
+ case SWS_PHASE_POSTCOPY_TRANSITION:
+ /*
+ * To deal with errors during the postcopy transition, we use the same
+ * strategy as during checkpoints.
+ */
+ postcopy_transition_done(egc, stream, rc);
+ break;
}
}
@@ -627,6 +680,16 @@ static void stream_done(libxl__egc *egc,
}
}
+static void postcopy_transition_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc)
+{
+ assert(stream->phase == SWS_PHASE_POSTCOPY_TRANSITION);
+ stream->postcopy_transitioned = true;
+ stream->phase = SWS_PHASE_NORMAL;
+ stream->postcopy_transition_callback(egc, stream, rc);
+}
+
static void checkpoint_done(libxl__egc *egc,
libxl__stream_write_state *stream,
int rc)
@@ -188,6 +188,7 @@ static void migrate_domain(uint32_t domid, const char *rune, int debug,
char rc_buf;
uint8_t *config_data;
int config_len, flags = LIBXL_SUSPEND_LIVE;
+ bool postcopy_transitioned;
save_domain_core_begin(domid, override_config_file,
&config_data, &config_len);
@@ -209,7 +210,9 @@ static void migrate_domain(uint32_t domid, const char *rune, int debug,
flags |= LIBXL_SUSPEND_DEBUG;
rc = libxl_domain_live_migrate(ctx, domid, send_fd, flags,
precopy_iterations, precopy_dirty_threshold,
- NULL);
+ recv_fd, &postcopy_transitioned, NULL);
+ assert(!postcopy_transitioned);
+
if (rc) {
fprintf(stderr, "migration sender: libxl_domain_suspend failed"
" (rc=%d)\n", rc);
To make the libxl sender capable of supporting postcopy live migration: - Add a postcopy transition callback chain through the stream writer (this callback chain is nearly identical to the checkpoint callback chain, and differs meaningfully only in its failure/completion behaviour) - Wire this callback chain up to the xc postcopy callback entries in the domain save logic. - Add parameters to libxl_domain_live_migrate() to permit bidirectional communication between the sender and receiver and enable the caller to reason about the safety of recovery from a postcopy failure. No mechanism is introduced yet to enable library clients to induce a postcopy live migration - this will follow after the libxl postcopy receiver logic. Signed-off-by: Joshua Otto <jtotto@uwaterloo.ca> --- docs/specs/libxl-migration-stream.pandoc | 19 ++++++++- tools/libxl/libxl.h | 4 +- tools/libxl/libxl_dom_save.c | 25 +++++++++++- tools/libxl/libxl_domain.c | 25 ++++++++---- tools/libxl/libxl_internal.h | 21 ++++++++-- tools/libxl/libxl_sr_stream_format.h | 13 +++--- tools/libxl/libxl_stream_write.c | 69 ++++++++++++++++++++++++++++++-- tools/xl/xl_migrate.c | 5 ++- 8 files changed, 155 insertions(+), 26 deletions(-)