@@ -78,6 +78,7 @@ struct save_callbacks {
typedef enum {
XC_MIG_STREAM_NONE, /* plain stream */
XC_MIG_STREAM_REMUS,
+ XC_MIG_STREAM_COLO,
} xc_migration_stream_t;
/**
@@ -97,6 +98,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
/* callbacks provided by xc_domain_restore */
struct restore_callbacks {
+ /* Called after a new checkpoint to suspend the guest.
+ */
+ int (*suspend)(void* data);
+
+ /* Called after the secondary vm is ready to resume.
+ * Callback function resumes the guest & the device model,
+ * returns to xc_domain_restore.
+ */
+ int (*postcopy)(void* data);
+
/* A checkpoint record has been found in the stream.
* returns: */
#define XGR_CHECKPOINT_ERROR 0 /* Terminate processing */
@@ -104,6 +115,15 @@ struct restore_callbacks {
#define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
int (*checkpoint)(void* data);
+ /*
+ * Called after the checkpoint callback.
+ *
+ * returns:
+ * 0: terminate checkpointing gracefully
+ * 1: take another checkpoint
+ */
+ int (*wait_checkpoint)(void* data);
+
/* to be provided as the last argument to each callback function */
void* data;
};
@@ -846,7 +846,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
/* If altering migration_stream update this assert too. */
assert(stream_type == XC_MIG_STREAM_NONE ||
- stream_type == XC_MIG_STREAM_REMUS);
+ stream_type == XC_MIG_STREAM_REMUS ||
+ stream_type == XC_MIG_STREAM_COLO);
/*
* TODO: Find some time to better tweak the live migration algorithm.
@@ -65,6 +65,7 @@ LIBXL_OBJS-y += libxl_no_convert_callout.o
endif
LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
new file mode 100644
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+struct libxl__ao;
+struct libxl__egc;
+
+enum {
+ LIBXL_COLO_SETUPED,
+ LIBXL_COLO_SUSPENDED,
+ LIBXL_COLO_RESUMED,
+};
+
+typedef struct libxl__domain_create_state libxl__domain_create_state;
+typedef void libxl__domain_create_cb(struct libxl__egc *egc,
+ libxl__domain_create_state *dcs,
+ int rc, uint32_t domid);
+
+typedef struct libxl__colo_restore_state libxl__colo_restore_state;
+typedef void libxl__colo_callback(struct libxl__egc *egc,
+ libxl__colo_restore_state *crs, int rc);
+
+struct libxl__colo_restore_state {
+ /* must set by caller of libxl__colo_(setup|teardown) */
+ struct libxl__ao *ao;
+ uint32_t domid;
+ int send_back_fd;
+ int recv_fd;
+ int hvm;
+ libxl__colo_callback *callback;
+
+ /* private, colo restore checkpoint state */
+ libxl__domain_create_cb *saved_cb;
+ void *crcs;
+};
+
+extern void libxl__colo_restore_setup(struct libxl__egc *egc,
+ libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval);
+#endif
new file mode 100644
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ * Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_sr_stream_format.h"
+
+typedef struct libxl__colo_restore_checkpoint_state libxl__colo_restore_checkpoint_state;
+struct libxl__colo_restore_checkpoint_state {
+ libxl__domain_suspend_state dsps;
+ libxl__logdirty_switch lds;
+ libxl__colo_restore_state *crs;
+ libxl__stream_write_state sws;
+ int status;
+ bool preresume;
+ /* used for teardown */
+ int teardown_devices;
+ int saved_rc;
+ char *state_file;
+
+ void (*callback)(libxl__egc *,
+ libxl__colo_restore_checkpoint_state *,
+ int);
+};
+
+static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+ NULL,
+};
+
+/* ===================== colo: common functions ===================== */
+
+static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc *egc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const uint32_t domid = crs->domid;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ EGC_GC;
+
+ /* we need to know which pages are dirty to restore the guest */
+ if (xc_shadow_control(CTX->xch, domid,
+ XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+ NULL, 0, NULL, 0, NULL) < 0) {
+ LOG(ERROR, "cannot enable secondary vm's logdirty");
+ lds->callback(egc, lds, ERROR_FAIL);
+ return;
+ }
+
+ if (crs->hvm) {
+ libxl__domain_common_switch_qemu_logdirty(egc, domid, 1, lds);
+ return;
+ }
+
+ lds->callback(egc, lds, 0);
+}
+
+static void colo_disable_logdirty(libxl__colo_restore_state *crs,
+ libxl__egc *egc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const uint32_t domid = crs->domid;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ EGC_GC;
+
+ /* we need to know which pages are dirty to restore the guest */
+ if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF,
+ NULL, 0, NULL, 0, NULL) < 0)
+ LOG(WARN, "cannot disable secondary vm's logdirty");
+
+ if (crs->hvm) {
+ libxl__domain_common_switch_qemu_logdirty(egc, domid, 0, lds);
+ return;
+ }
+
+ lds->callback(egc, lds, 0);
+}
+
+static void colo_resume_vm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int restore_device_model)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int rc;
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+
+ EGC_GC;
+
+ if (!crs->saved_cb) {
+ /* TODO: sync mmu for hvm? */
+ if (restore_device_model) {
+ rc = libxl__qmp_restore(gc, crs->domid, crcs->state_file);
+ if (rc) {
+ LOG(ERROR, "cannot restore device model for secondary vm");
+ crcs->callback(egc, crcs, rc);
+ return;
+ }
+ }
+ rc = libxl__domain_resume(gc, crs->domid, 0);
+ if (rc)
+ LOG(ERROR, "cannot resume secondary vm");
+
+ crcs->callback(egc, crcs, rc);
+ return;
+ }
+
+ /*
+ * TODO: get store gfn and console gfn
+ * We should call the callback restore_results in
+ * xc_domain_restore() before resuming the guest.
+ */
+ libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
+
+ return;
+}
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* init device subkind-specific state in the libxl ctx */
+ int rc;
+ STATE_AO_GC(cds->ao);
+
+ rc = 0;
+ return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* cleanup device subkind-specific state in the libxl ctx */
+ STATE_AO_GC(cds->ao);
+}
+
+/* ================ colo: setup restore environment ================ */
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+ libxl__domain_create_state *dcs,
+ int rc, uint32_t domid);
+
+static int init_dsps(libxl__domain_suspend_state *dsps)
+{
+ int rc = ERROR_FAIL;
+ libxl_domain_type type;
+
+ STATE_AO_GC(dsps->ao);
+
+ libxl__xswait_init(&dsps->pvcontrol);
+ libxl__ev_evtchn_init(&dsps->guest_evtchn);
+ libxl__ev_xswatch_init(&dsps->guest_watch);
+ libxl__ev_time_init(&dsps->guest_timeout);
+
+ type = libxl__domain_type(gc, dsps->domid);
+ if (type == LIBXL_DOMAIN_TYPE_INVALID)
+ goto out;
+
+ dsps->type = type;
+
+ dsps->guest_evtchn.port = -1;
+ dsps->guest_evtchn_lockfd = -1;
+ dsps->guest_responded = 0;
+ dsps->dm_savefile = libxl__device_model_savefile(gc, dsps->domid);
+
+ /* Secondary vm is not created, so we cannot get evtchn port */
+
+ rc = 0;
+
+out:
+ return rc;
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. resume
+ * 2. wait checkpoint
+ * 3. suspend
+ * 4. checkpoint
+ */
+static void libxl__colo_restore_domain_resume_callback(void *data);
+static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
+static void libxl__colo_restore_domain_suspend_callback(void *data);
+static void libxl__colo_restore_domain_checkpoint_callback(void *data);
+
+void libxl__colo_restore_setup(libxl__egc *egc,
+ libxl__colo_restore_state *crs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs;
+ int rc = ERROR_FAIL;
+
+ /* Convenience aliases */
+ libxl__srm_restore_autogen_callbacks *const callbacks =
+ &dcs->srs.shs.callbacks.restore.a;
+ const int domid = crs->domid;
+
+ STATE_AO_GC(crs->ao);
+
+ GCNEW(crcs);
+ crs->crcs = crcs;
+ crcs->crs = crs;
+
+ /* setup dsps */
+ crcs->dsps.ao = ao;
+ crcs->dsps.domid = domid;
+ if (init_dsps(&crcs->dsps))
+ goto out;
+
+ callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
+ callbacks->wait_checkpoint = libxl__colo_restore_domain_wait_checkpoint_callback;
+ callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
+ callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
+
+ /*
+ * Secondary vm is running in colo mode, so we need to call
+ * libxl__xc_domain_restore_done() to create secondary vm.
+ * But we will exit in domain_create_cb(). So replace the
+ * callback here.
+ */
+ crs->saved_cb = dcs->callback;
+ dcs->callback = libxl__colo_domain_create_cb;
+ crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", domid);
+ crcs->status = LIBXL_COLO_SETUPED;
+
+ libxl__logdirty_init(&crcs->lds);
+ crcs->lds.ao = ao;
+
+ crcs->sws.fd = crs->send_back_fd;
+ crcs->sws.ao = ao;
+ crcs->sws.back_channel = true;
+
+ dcs->cds.concrete_data = crs;
+
+ libxl__stream_write_start(egc, &crcs->sws);
+
+ rc = 0;
+
+out:
+ crs->callback(egc, crs, rc);
+ return;
+}
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+ libxl__domain_create_state *dcs,
+ int rc, uint32_t domid)
+{
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ crcs->callback(egc, crcs, rc);
+}
+
+/* ================ colo: teardown restore environment ================ */
+
+static void colo_restore_teardown_devices_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds, int rc);
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs);
+static void do_failover_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state* crcs,
+ int rc);
+static void colo_disable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc);
+
+void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval)
+{
+ libxl__domain_create_state *dcs = dcs_void;
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+ int rc = 1;
+
+ /* convenience aliases */
+ libxl__colo_restore_state *const crs = &dcs->crs;
+ EGC_GC;
+
+ if (ret == 0 && retval == 0)
+ rc = 0;
+
+ LOG(INFO, "%s", rc ? "colo fails" : "failover");
+
+ libxl__stream_write_abort(egc, &crcs->sws, 1);
+ if (crs->saved_cb) {
+ /* crcs->status is LIBXL_COLO_SETUPED */
+ dcs->srs.completion_callback = NULL;
+ }
+ libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval);
+
+ crcs->saved_rc = rc;
+ if (!crcs->teardown_devices) {
+ colo_restore_teardown_devices_done(egc, &dcs->cds, 0);
+ return;
+ }
+
+ dcs->cds.callback = colo_restore_teardown_devices_done;
+ libxl__checkpoint_devices_teardown(egc, &dcs->cds);
+}
+
+static void colo_restore_teardown_devices_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds, int rc)
+{
+ libxl__colo_restore_state *crs = cds->concrete_data;
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+ EGC_GC;
+
+ if (rc)
+ LOG(ERROR, "COLO: failed to teardown device for guest with domid %u,"
+ " rc %d", cds->domid, rc);
+
+ if (crcs->teardown_devices)
+ cleanup_device_subkind(cds);
+
+ rc = crcs->saved_rc;
+ if (!rc) {
+ crcs->callback = do_failover_done;
+ do_failover(egc, crs);
+ return;
+ }
+
+ libxl__colo_restore_teardown_done(egc, crs, rc);
+}
+
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs)
+{
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const int status = crcs->status;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ EGC_GC;
+
+ switch(status) {
+ case LIBXL_COLO_SETUPED:
+ /*
+ * We will come here only when reading emulator xenstore data or
+ * emulator context fails, and libxl__xc_domain_restore_done()
+ * is not called. In this case, the migration is not finished,
+ * so we cannot do failover.
+ */
+ LOG(ERROR, "migration fails");
+ crcs->callback(egc, crcs, ERROR_FAIL);
+ return;
+ case LIBXL_COLO_SUSPENDED:
+ case LIBXL_COLO_RESUMED:
+ /* disable logdirty first */
+ lds->callback = colo_disable_logdirty_done;
+ colo_disable_logdirty(crs, egc);
+ return;
+ default:
+ LOG(ERROR, "invalid status: %d", status);
+ crcs->callback(egc, crcs, ERROR_FAIL);
+ }
+}
+
+static void do_failover_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state* crcs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+
+ EGC_GC;
+
+ if (rc)
+ LOG(ERROR, "cannot do failover");
+
+ libxl__colo_restore_teardown_done(egc, crs, rc);
+}
+
+static void colo_disable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+ EGC_GC;
+
+ if (rc)
+ LOG(WARN, "cannot disable logdirty");
+
+ if (crcs->status == LIBXL_COLO_SUSPENDED) {
+ /*
+ * failover when reading state from master, so no need to
+ * call libxl__qmp_restore().
+ */
+ colo_resume_vm(egc, crcs, 0);
+ return;
+ }
+
+ /* If we cannot disable logdirty, we still can do failover */
+ crcs->callback(egc, crcs, 0);
+}
+
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ EGC_GC;
+
+ /* convenience aliases */
+ const int domid = crs->domid;
+ const libxl_ctx *const ctx = libxl__gc_owner(gc);
+ xc_interface *const xch = ctx->xch;
+
+ if (!rc)
+ /* failover, no need to destroy the secondary vm */
+ goto out;
+
+ xc_domain_destroy(xch, domid);
+
+out:
+ if (crs->saved_cb) {
+ dcs->callback = crs->saved_cb;
+ crs->saved_cb = NULL;
+ }
+
+ dcs->callback(egc, dcs, rc, crs->domid);
+}
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc);
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc);
+
+/* ======================== colo: checkpoint ======================= */
+
+/*
+ * Do the following things when resuming secondary vm:
+ * 1. read emulator xenstore data
+ * 2. read emulator context
+ * 3. REC_TYPE_CHECKPOINT_END
+ */
+static void libxl__colo_restore_domain_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+ libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ crcs->callback = NULL;
+ dcs->srs.checkpoint_callback = colo_common_read_stream_done;
+ libxl__stream_read_start_checkpoint(shs->egc, &dcs->srs);
+}
+
+/* ===================== colo: resume secondary vm ===================== */
+
+/*
+ * Do the following things when resuming secondary vm the first time:
+ * 1. resume secondary vm
+ * 2. enable log dirty
+ * 3. setup checkpoint devices
+ * 4. write CHECKPOINT_SVM_READY
+ * 5. unpause secondary vm
+ * 6. write CHECKPOINT_SVM_RESUMED
+ *
+ * Do the following things when resuming secondary vm:
+ * 1. write CHECKPOINT_SVM_READY
+ * 2. resume secondary vm
+ * 3. write CHECKPOINT_SVM_RESUMED
+ */
+static void colo_send_svm_ready(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs);
+static void colo_send_svm_ready_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc);
+static void colo_restore_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_restore_resume_vm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs);
+static void colo_resume_vm_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc);
+static void colo_write_svm_resumed(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs);
+static void colo_enable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int retval);
+static void colo_reenable_logdirty(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+ libxl__colo_restore_state *crs);
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_unpause_svm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs);
+
+static void libxl__colo_restore_domain_resume_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+ libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ if (crcs->teardown_devices)
+ colo_send_svm_ready(shs->egc, crcs);
+ else
+ colo_restore_resume_vm(shs->egc, crcs);
+}
+
+static void colo_send_svm_ready(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+ libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_READY };
+
+ crcs->callback = colo_send_svm_ready_done;
+ crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+ libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+}
+
+static void colo_send_svm_ready_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+ if (!crcs->preresume) {
+ crcs->preresume = true;
+ colo_unpause_svm(egc, crcs);
+ return;
+ }
+
+ cds->callback = colo_restore_preresume_cb;
+ libxl__checkpoint_devices_preresume(egc, cds);
+}
+
+static void colo_restore_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_restore_state *crs = cds->concrete_data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "preresume fails");
+ goto out;
+ }
+
+ colo_restore_resume_vm(egc, crcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_resume_vm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+
+ crcs->callback = colo_resume_vm_done;
+ colo_resume_vm(egc, crcs, 1);
+}
+
+static void colo_resume_vm_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "cannot resume secondary vm");
+ goto out;
+ }
+
+ crcs->status = LIBXL_COLO_RESUMED;
+
+ /* avoid calling stream->completion_callback() more than once */
+ if (crs->saved_cb) {
+ dcs->callback = crs->saved_cb;
+ crs->saved_cb = NULL;
+
+ dcs->srs.completion_callback = NULL;
+
+ lds->callback = colo_enable_logdirty_done;
+ colo_enable_logdirty(crs, egc);
+ return;
+ }
+
+ colo_write_svm_resumed(egc, crcs);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_write_svm_resumed(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+ libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_RESUMED };
+
+ crcs->callback = NULL;
+ crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+ libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+}
+
+static void colo_enable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+
+ EGC_GC;
+
+ if (rc) {
+ /*
+ * log-dirty already enabled? There's no test op,
+ * so attempt to disable then reenable it
+ */
+ lds->callback = colo_reenable_logdirty;
+ colo_disable_logdirty(crs, egc);
+ return;
+ }
+
+ colo_setup_checkpoint_devices(egc, crs);
+}
+
+static void colo_reenable_logdirty(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "cannot enable logdirty");
+ goto out;
+ }
+
+ lds->callback = colo_reenable_logdirty_done;
+ colo_enable_logdirty(crs, egc);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "cannot enable logdirty");
+ goto out;
+ }
+
+ colo_setup_checkpoint_devices(egc, crcs->crs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+/*
+ * We cannot setup checkpoint devices in libxl__colo_restore_setup(),
+ * because the guest is not ready.
+ */
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+ libxl__colo_restore_state *crs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *cds = &dcs->cds;
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ STATE_AO_GC(crs->ao);
+
+ /* TODO: disk/nic support */
+ cds->device_kind_flags = 0;
+ cds->callback = colo_restore_setup_cds_done;
+ cds->ao = ao;
+ cds->domid = crs->domid;
+ cds->ops = colo_restore_ops;
+
+ if (init_device_subkind(cds))
+ goto out;
+
+ crcs->teardown_devices = 1;
+
+ libxl__checkpoint_devices_setup(egc, cds);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_restore_state *crs = cds->concrete_data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+ cds->domid);
+ goto out;
+ }
+
+ colo_send_svm_ready(egc, crcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_unpause_svm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int rc;
+
+ /* Convenience aliases */
+ const uint32_t domid = crcs->crs->domid;
+ libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+ EGC_GC;
+
+ /* We have enabled secondary vm's logdirty, so we can unpause it now */
+ rc = libxl_domain_unpause(CTX, domid);
+ if (rc) {
+ LOG(ERROR, "cannot unpause secondary vm");
+ goto out;
+ }
+
+ colo_write_svm_resumed(egc, crcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+/* ===================== colo: wait new checkpoint ===================== */
+
+static void colo_restore_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_stream_read_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int real_size);
+
+static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+ libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+ cds->callback = colo_restore_commit_cb;
+ libxl__checkpoint_devices_commit(shs->egc, cds);
+}
+
+static void colo_restore_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_restore_state *crs = cds->concrete_data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "commit fails");
+ goto out;
+ }
+
+ crcs->callback = colo_stream_read_done;
+ dcs->srs.checkpoint_callback = colo_common_read_stream_done;
+ libxl__stream_read_checkpoint_state(egc, &dcs->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
+}
+
+static void colo_stream_read_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int id)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int ok = 0;
+
+ EGC_GC;
+
+ if (id != CHECKPOINT_NEW) {
+ LOG(ERROR, "invalid section: %d", id);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
+
+/* ===================== colo: suspend secondary vm ===================== */
+
+/*
+ * Do the following things when resuming secondary vm:
+ * 1. suspend secondary vm
+ * 2. send CHECKPOINT_SVM_SUSPENDED
+ */
+static void colo_suspend_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int ok);
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+static void libxl__colo_restore_domain_suspend_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+ libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ STATE_AO_GC(dcs->ao);
+
+ /* Convenience aliases */
+ libxl__domain_suspend_state *const dsps = &crcs->dsps;
+
+ /* suspend secondary vm */
+ dsps->callback_common_done = colo_suspend_vm_done;
+
+ libxl__domain_suspend(shs->egc, dsps);
+}
+
+static void colo_suspend_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dsps, *crcs, dsps);
+ libxl__colo_restore_state *crs = crcs->crs;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "cannot suspend secondary vm");
+ goto out;
+ }
+
+ crcs->status = LIBXL_COLO_SUSPENDED;
+
+ cds->callback = colo_restore_postsuspend_cb;
+ libxl__checkpoint_devices_postsuspend(egc, cds);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+}
+
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_restore_state *crs = cds->concrete_data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+ libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_SUSPENDED };
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "postsuspend fails");
+ goto out;
+ }
+
+ crcs->callback = NULL;
+ crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+ libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+}
+
+/* ===================== colo: common callback ===================== */
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs =
+ CONTAINER_OF(stream, *crcs, sws);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int ok;
+
+ EGC_GC;
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "sending data fails");
+ ok = 2;
+ goto out;
+ }
+
+ if (!crcs->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ crcs->callback(egc, crcs, 0);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
+
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(stream, *dcs, srs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+ int ok;
+
+ EGC_GC;
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "reading data fails");
+ ok = 2;
+ goto out;
+ }
+
+ if (!crcs->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ /* rc contains the id */
+ crcs->callback(egc, crcs, rc);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
@@ -985,6 +985,23 @@ static void domcreate_console_available(libxl__egc *egc,
dcs->aop_console_how.for_event));
}
+static void libxl__colo_restore_setup_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "colo restore setup fails: %d", rc);
+ domcreate_stream_done(egc, &dcs->srs, rc);
+ return;
+ }
+
+ libxl__stream_read_start(egc, &dcs->srs);
+}
+
static void domcreate_bootloader_done(libxl__egc *egc,
libxl__bootloader_state *bl,
int rc)
@@ -998,6 +1015,8 @@ static void domcreate_bootloader_done(libxl__egc *egc,
const int restore_fd = dcs->restore_fd;
libxl__domain_build_state *const state = &dcs->build_state;
const int checkpointed_stream = dcs->restore_params.checkpointed_stream;
+ libxl__colo_restore_state *const crs = &dcs->crs;
+ libxl_domain_build_info *const info = &d_config->b_info;
if (rc) {
domcreate_rebuild_done(egc, dcs, rc);
@@ -1026,6 +1045,22 @@ static void domcreate_bootloader_done(libxl__egc *egc,
/* Restore */
+ /* COLO only supports HVM now because it does not work very
+ * well with pv drivers:
+ * 1. We need to resume vm in the slow path. In this case we
+ * need to disconnect/reconnect backend and frontend. It
+ * will take too much time and the performance is very slow.
+ * 2. PV disk cannot reuse block replication that is implemented
+ * in QEMU.
+ */
+ if (info->type != LIBXL_DOMAIN_TYPE_HVM &&
+ checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+ LOG(ERROR, "COLO only supports HVM, unable to restore domain %d",
+ domid);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
rc = libxl__build_pre(gc, domid, d_config, state);
if (rc)
goto out;
@@ -1039,6 +1074,16 @@ static void domcreate_bootloader_done(libxl__egc *egc,
if (restore_fd >= 0) {
switch (checkpointed_stream) {
+ case LIBXL_CHECKPOINTED_STREAM_COLO:
+ /* colo restore setup */
+ crs->ao = ao;
+ crs->domid = domid;
+ crs->send_back_fd = dcs->send_back_fd;
+ crs->recv_fd = restore_fd;
+ crs->hvm = (info->type == LIBXL_DOMAIN_TYPE_HVM);
+ crs->callback = libxl__colo_restore_setup_done;
+ libxl__colo_restore_setup(egc, crs);
+ break;
case LIBXL_CHECKPOINTED_STREAM_REMUS:
libxl__remus_restore_setup(egc, dcs);
/* fall through */
@@ -87,6 +87,8 @@
#include "_libxl_types_internal.h"
#include "_libxl_types_internal_json.h"
+#include "libxl_colo.h"
+
#define LIBXL_INIT_TIMEOUT 10
#define LIBXL_DESTROY_TIMEOUT 10
#define LIBXL_HOTPLUG_TIMEOUT 40
@@ -3422,12 +3424,6 @@ _hidden int libxl__destroy_qdisk_backend(libxl__gc *gc, uint32_t domid);
/*----- Domain creation -----*/
-typedef struct libxl__domain_create_state libxl__domain_create_state;
-
-typedef void libxl__domain_create_cb(libxl__egc *egc,
- libxl__domain_create_state*,
- int rc, uint32_t domid);
-
/* State for manipulating a libxl migration v2 stream */
typedef struct libxl__stream_read_state libxl__stream_read_state;
@@ -3510,6 +3506,8 @@ struct libxl__domain_create_state {
/* private to domain_create */
int guest_domid;
libxl__domain_build_state build_state;
+ libxl__colo_restore_state crs;
+ libxl__checkpoint_devices_state cds;
libxl__bootloader_state bl;
libxl__stub_dm_spawn_state dmss;
/* If we're not doing stubdom, we use only dmss.dm,
@@ -68,7 +68,11 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs,
shs->ao = ao;
shs->domid = domid;
shs->recv_callback = libxl__srm_callout_received_restore;
- shs->completion_callback = libxl__xc_domain_restore_done;
+ if (dcs->restore_params.checkpointed_stream ==
+ LIBXL_CHECKPOINTED_STREAM_COLO)
+ shs->completion_callback = libxl__colo_restore_teardown;
+ else
+ shs->completion_callback = libxl__xc_domain_restore_done;
shs->caller_state = dcs;
shs->need_results = 1;
@@ -23,14 +23,15 @@ our @msgs = (
STRING doing_what),
'unsigned long', 'done',
'unsigned long', 'total'] ],
- [ 3, 'scxA', "suspend", [] ],
- [ 4, 'scxA', "postcopy", [] ],
+ [ 3, 'srcxA', "suspend", [] ],
+ [ 4, 'srcxA', "postcopy", [] ],
[ 5, 'srcxA', "checkpoint", [] ],
- [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid
+ [ 6, 'rcxA', "wait_checkpoint", [] ],
+ [ 7, 'scxA', "switch_qemu_logdirty", [qw(int domid
unsigned enable)] ],
- [ 7, 'r', "restore_results", ['unsigned long', 'store_mfn',
+ [ 8, 'r', "restore_results", ['unsigned long', 'store_mfn',
'unsigned long', 'console_mfn'] ],
- [ 8, 'srW', "complete", [qw(int retval
+ [ 9, 'srW', "complete", [qw(int retval
int errnoval)] ],
);
@@ -850,6 +850,18 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
*/
if (libxl__stream_read_inuse(stream)) {
switch (checkpointed_stream) {
+ case LIBXL_CHECKPOINTED_STREAM_COLO:
+ if (stream->completion_callback) {
+ /*
+ * restore, just build the secondary vm, don't close
+ * the stream
+ */
+ stream->completion_callback(egc, stream, 0);
+ } else {
+ /* failover, just close the stream */
+ stream_complete(egc, stream, 0);
+ }
+ break;
case LIBXL_CHECKPOINTED_STREAM_REMUS:
/*
* Failover from primary. Domain state is currently at a
@@ -233,6 +233,7 @@ libxl_hdtype = Enumeration("hdtype", [
libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
(0, "NONE"),
(1, "REMUS"),
+ (2, "COLO"),
])
#