diff mbox

[v13,12/26] secondary vm suspend/resume/checkpoint code

Message ID 1458888273-7469-13-git-send-email-xiecl.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Changlong Xie March 25, 2016, 6:44 a.m. UTC
From: Wen Congyang <wency@cn.fujitsu.com>

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. Resume secondary vm
   a. Send CHECKPOINT_SVM_READY to master.
   b. If it is not the first resume, call libxl__checkpoint_devices_preresume().
   c. If it is the first resume(resume right after live migration),
      - call libxl__xc_domain_restore_done() to build the secondary vm.
      - enable secondary vm's logdirty.
      - call libxl__domain_resume() to resume secondary vm.
      - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
   d. Send CHECKPOINT_SVM_RESUMED to master.
2. Wait a new checkpoint
   a. Call libxl__checkpoint_devices_commit().
   b. Read CHECKPOINT_NEW from master.
3. Suspend secondary vm
   a. Suspend secondary vm.
   b. Call libxl__checkpoint_devices_postsuspend().
   c. Send CHECKPOINT_SVM_SUSPENDED to master.
4. Checkpoint
   a. Read emulator xenstore data and emulator context
   b. REC_TYPE_CHECKPOINT_END

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
 tools/libxc/include/xenguest.h     |   20 +
 tools/libxc/xc_sr_save.c           |    3 +-
 tools/libxl/Makefile               |    1 +
 tools/libxl/libxl_colo.h           |   55 ++
 tools/libxl/libxl_colo_restore.c   | 1029 ++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_create.c         |   45 ++
 tools/libxl/libxl_internal.h       |   10 +-
 tools/libxl/libxl_save_callout.c   |    6 +-
 tools/libxl/libxl_save_msgs_gen.pl |   11 +-
 tools/libxl/libxl_stream_read.c    |   12 +
 tools/libxl/libxl_types.idl        |    1 +
 11 files changed, 1180 insertions(+), 13 deletions(-)
 create mode 100644 tools/libxl/libxl_colo.h
 create mode 100644 tools/libxl/libxl_colo_restore.c

Comments

Ian Jackson March 30, 2016, 2:07 p.m. UTC | #1
Changlong Xie writes ("[PATCH v13 12/26] secondary vm suspend/resume/checkpoint code"):
> From: Wen Congyang <wency@cn.fujitsu.com>
> 
> Secondary vm is running in colo mode. So we will do
> the following things again and again:
> 1. Resume secondary vm
>    a. Send CHECKPOINT_SVM_READY to master.
>    b. If it is not the first resume, call libxl__checkpoint_devices_preresume().
>    c. If it is the first resume(resume right after live migration),
>       - call libxl__xc_domain_restore_done() to build the secondary vm.
>       - enable secondary vm's logdirty.
>       - call libxl__domain_resume() to resume secondary vm.
>       - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
>    d. Send CHECKPOINT_SVM_RESUMED to master.
> 2. Wait a new checkpoint
>    a. Call libxl__checkpoint_devices_commit().
>    b. Read CHECKPOINT_NEW from master.
> 3. Suspend secondary vm
>    a. Suspend secondary vm.
>    b. Call libxl__checkpoint_devices_postsuspend().
>    c. Send CHECKPOINT_SVM_SUSPENDED to master.
> 4. Checkpoint
>    a. Read emulator xenstore data and emulator context
>    b. REC_TYPE_CHECKPOINT_END
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>

Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
diff mbox

Patch

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index b4f4bfb..3193d0f 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -78,6 +78,7 @@  struct save_callbacks {
 typedef enum {
     XC_MIG_STREAM_NONE, /* plain stream */
     XC_MIG_STREAM_REMUS,
+    XC_MIG_STREAM_COLO,
 } xc_migration_stream_t;
 
 /**
@@ -97,6 +98,16 @@  int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
 
 /* callbacks provided by xc_domain_restore */
 struct restore_callbacks {
+    /* Called after a new checkpoint to suspend the guest.
+     */
+    int (*suspend)(void* data);
+
+    /* Called after the secondary vm is ready to resume.
+     * Callback function resumes the guest & the device model,
+     * returns to xc_domain_restore.
+     */
+    int (*postcopy)(void* data);
+
     /* A checkpoint record has been found in the stream.
      * returns: */
 #define XGR_CHECKPOINT_ERROR    0 /* Terminate processing */
@@ -104,6 +115,15 @@  struct restore_callbacks {
 #define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
     int (*checkpoint)(void* data);
 
+    /*
+     * Called after the checkpoint callback.
+     *
+     * returns:
+     * 0: terminate checkpointing gracefully
+     * 1: take another checkpoint
+     */
+    int (*wait_checkpoint)(void* data);
+
     /* to be provided as the last argument to each callback function */
     void* data;
 };
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 1ccdbbb..d3d95d4 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -846,7 +846,8 @@  int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
 
     /* If altering migration_stream update this assert too. */
     assert(stream_type == XC_MIG_STREAM_NONE ||
-           stream_type == XC_MIG_STREAM_REMUS);
+           stream_type == XC_MIG_STREAM_REMUS ||
+           stream_type == XC_MIG_STREAM_COLO);
 
     /*
      * TODO: Find some time to better tweak the live migration algorithm.
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 8fa7b87..35a07a7 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -65,6 +65,7 @@  LIBXL_OBJS-y += libxl_no_convert_callout.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
new file mode 100644
index 0000000..f2b98cc
--- /dev/null
+++ b/tools/libxl/libxl_colo.h
@@ -0,0 +1,55 @@ 
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+struct libxl__ao;
+struct libxl__egc;
+
+enum {
+    LIBXL_COLO_SETUPED,
+    LIBXL_COLO_SUSPENDED,
+    LIBXL_COLO_RESUMED,
+};
+
+typedef struct libxl__domain_create_state libxl__domain_create_state;
+typedef void libxl__domain_create_cb(struct libxl__egc *egc,
+                                     libxl__domain_create_state *dcs,
+                                     int rc, uint32_t domid);
+
+typedef struct libxl__colo_restore_state libxl__colo_restore_state;
+typedef void libxl__colo_callback(struct libxl__egc *egc,
+                                  libxl__colo_restore_state *crs, int rc);
+
+struct libxl__colo_restore_state {
+    /* must set by caller of libxl__colo_(setup|teardown) */
+    struct libxl__ao *ao;
+    uint32_t domid;
+    int send_back_fd;
+    int recv_fd;
+    int hvm;
+    libxl__colo_callback *callback;
+
+    /* private, colo restore checkpoint state */
+    libxl__domain_create_cb *saved_cb;
+    void *crcs;
+};
+
+extern void libxl__colo_restore_setup(struct libxl__egc *egc,
+                                      libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void,
+                                         int ret, int retval, int errnoval);
+#endif
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
new file mode 100644
index 0000000..a8f74a7
--- /dev/null
+++ b/tools/libxl/libxl_colo_restore.c
@@ -0,0 +1,1029 @@ 
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *         Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_sr_stream_format.h"
+
+typedef struct libxl__colo_restore_checkpoint_state libxl__colo_restore_checkpoint_state;
+struct libxl__colo_restore_checkpoint_state {
+    libxl__domain_suspend_state dsps;
+    libxl__logdirty_switch lds;
+    libxl__colo_restore_state *crs;
+    libxl__stream_write_state sws;
+    int status;
+    bool preresume;
+    /* used for teardown */
+    int teardown_devices;
+    int saved_rc;
+    char *state_file;
+
+    void (*callback)(libxl__egc *,
+                     libxl__colo_restore_checkpoint_state *,
+                     int);
+};
+
+static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    NULL,
+};
+
+/* ===================== colo: common functions ===================== */
+
+static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc *egc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const uint32_t domid = crs->domid;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    EGC_GC;
+
+    /* we need to know which pages are dirty to restore the guest */
+    if (xc_shadow_control(CTX->xch, domid,
+                          XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                          NULL, 0, NULL, 0, NULL) < 0) {
+        LOG(ERROR, "cannot enable secondary vm's logdirty");
+        lds->callback(egc, lds, ERROR_FAIL);
+        return;
+    }
+
+    if (crs->hvm) {
+        libxl__domain_common_switch_qemu_logdirty(egc, domid, 1, lds);
+        return;
+    }
+
+    lds->callback(egc, lds, 0);
+}
+
+static void colo_disable_logdirty(libxl__colo_restore_state *crs,
+                                  libxl__egc *egc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const uint32_t domid = crs->domid;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    EGC_GC;
+
+    /* we need to know which pages are dirty to restore the guest */
+    if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF,
+                          NULL, 0, NULL, 0, NULL) < 0)
+        LOG(WARN, "cannot disable secondary vm's logdirty");
+
+    if (crs->hvm) {
+        libxl__domain_common_switch_qemu_logdirty(egc, domid, 0, lds);
+        return;
+    }
+
+    lds->callback(egc, lds, 0);
+}
+
+static void colo_resume_vm(libxl__egc *egc,
+                           libxl__colo_restore_checkpoint_state *crcs,
+                           int restore_device_model)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int rc;
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    EGC_GC;
+
+    if (!crs->saved_cb) {
+        /* TODO: sync mmu for hvm? */
+        if (restore_device_model) {
+            rc = libxl__qmp_restore(gc, crs->domid, crcs->state_file);
+            if (rc) {
+                LOG(ERROR, "cannot restore device model for secondary vm");
+                crcs->callback(egc, crcs, rc);
+                return;
+            }
+        }
+        rc = libxl__domain_resume(gc, crs->domid, 0);
+        if (rc)
+            LOG(ERROR, "cannot resume secondary vm");
+
+        crcs->callback(egc, crcs, rc);
+        return;
+    }
+
+    /*
+     * TODO: get store gfn and console gfn
+     *  We should call the callback restore_results in
+     *  xc_domain_restore() before resuming the guest.
+     */
+    libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
+
+    return;
+}
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* init device subkind-specific state in the libxl ctx */
+    int rc;
+    STATE_AO_GC(cds->ao);
+
+    rc = 0;
+    return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* cleanup device subkind-specific state in the libxl ctx */
+    STATE_AO_GC(cds->ao);
+}
+
+/* ================ colo: setup restore environment ================ */
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+                                         libxl__domain_create_state *dcs,
+                                         int rc, uint32_t domid);
+
+static int init_dsps(libxl__domain_suspend_state *dsps)
+{
+    int rc = ERROR_FAIL;
+    libxl_domain_type type;
+
+    STATE_AO_GC(dsps->ao);
+
+    libxl__xswait_init(&dsps->pvcontrol);
+    libxl__ev_evtchn_init(&dsps->guest_evtchn);
+    libxl__ev_xswatch_init(&dsps->guest_watch);
+    libxl__ev_time_init(&dsps->guest_timeout);
+
+    type = libxl__domain_type(gc, dsps->domid);
+    if (type == LIBXL_DOMAIN_TYPE_INVALID)
+        goto out;
+
+    dsps->type = type;
+
+    dsps->guest_evtchn.port = -1;
+    dsps->guest_evtchn_lockfd = -1;
+    dsps->guest_responded = 0;
+    dsps->dm_savefile = libxl__device_model_savefile(gc, dsps->domid);
+
+    /* Secondary vm is not created, so we cannot get evtchn port */
+
+    rc = 0;
+
+out:
+    return rc;
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. resume
+ * 2. wait checkpoint
+ * 3. suspend
+ * 4. checkpoint
+ */
+static void libxl__colo_restore_domain_resume_callback(void *data);
+static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
+static void libxl__colo_restore_domain_suspend_callback(void *data);
+static void libxl__colo_restore_domain_checkpoint_callback(void *data);
+
+void libxl__colo_restore_setup(libxl__egc *egc,
+                               libxl__colo_restore_state *crs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs;
+    int rc = ERROR_FAIL;
+
+    /* Convenience aliases */
+    libxl__srm_restore_autogen_callbacks *const callbacks =
+        &dcs->srs.shs.callbacks.restore.a;
+    const int domid = crs->domid;
+
+    STATE_AO_GC(crs->ao);
+
+    GCNEW(crcs);
+    crs->crcs = crcs;
+    crcs->crs = crs;
+
+    /* setup dsps */
+    crcs->dsps.ao = ao;
+    crcs->dsps.domid = domid;
+    if (init_dsps(&crcs->dsps))
+        goto out;
+
+    callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
+    callbacks->wait_checkpoint = libxl__colo_restore_domain_wait_checkpoint_callback;
+    callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
+    callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
+
+    /*
+     * Secondary vm is running in colo mode, so we need to call
+     * libxl__xc_domain_restore_done() to create secondary vm.
+     * But we will exit in domain_create_cb(). So replace the
+     * callback here.
+     */
+    crs->saved_cb = dcs->callback;
+    dcs->callback = libxl__colo_domain_create_cb;
+    crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", domid);
+    crcs->status = LIBXL_COLO_SETUPED;
+
+    libxl__logdirty_init(&crcs->lds);
+    crcs->lds.ao = ao;
+
+    crcs->sws.fd = crs->send_back_fd;
+    crcs->sws.ao = ao;
+    crcs->sws.back_channel = true;
+
+    dcs->cds.concrete_data = crs;
+
+    libxl__stream_write_start(egc, &crcs->sws);
+
+    rc = 0;
+
+out:
+    crs->callback(egc, crs, rc);
+    return;
+}
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+                                         libxl__domain_create_state *dcs,
+                                         int rc, uint32_t domid)
+{
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    crcs->callback(egc, crcs, rc);
+}
+
+/* ================ colo: teardown restore environment ================ */
+
+static void colo_restore_teardown_devices_done(libxl__egc *egc,
+    libxl__checkpoint_devices_state *cds, int rc);
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs);
+static void do_failover_done(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state* crcs,
+                             int rc);
+static void colo_disable_logdirty_done(libxl__egc *egc,
+                                       libxl__logdirty_switch *lds,
+                                       int rc);
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+                                              libxl__colo_restore_state *crs,
+                                              int rc);
+
+void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void,
+                                  int ret, int retval, int errnoval)
+{
+    libxl__domain_create_state *dcs = dcs_void;
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+    int rc = 1;
+
+    /* convenience aliases */
+    libxl__colo_restore_state *const crs = &dcs->crs;
+    EGC_GC;
+
+    if (ret == 0 && retval == 0)
+        rc = 0;
+
+    LOG(INFO, "%s", rc ? "colo fails" : "failover");
+
+    libxl__stream_write_abort(egc, &crcs->sws, 1);
+    if (crs->saved_cb) {
+        /* crcs->status is LIBXL_COLO_SETUPED */
+        dcs->srs.completion_callback = NULL;
+    }
+    libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval);
+
+    crcs->saved_rc = rc;
+    if (!crcs->teardown_devices) {
+        colo_restore_teardown_devices_done(egc, &dcs->cds, 0);
+        return;
+    }
+
+    dcs->cds.callback = colo_restore_teardown_devices_done;
+    libxl__checkpoint_devices_teardown(egc, &dcs->cds);
+}
+
+static void colo_restore_teardown_devices_done(libxl__egc *egc,
+    libxl__checkpoint_devices_state *cds, int rc)
+{
+    libxl__colo_restore_state *crs = cds->concrete_data;
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    EGC_GC;
+
+    if (rc)
+        LOG(ERROR, "COLO: failed to teardown device for guest with domid %u,"
+            " rc %d", cds->domid, rc);
+
+    if (crcs->teardown_devices)
+        cleanup_device_subkind(cds);
+
+    rc = crcs->saved_rc;
+    if (!rc) {
+        crcs->callback = do_failover_done;
+        do_failover(egc, crs);
+        return;
+    }
+
+    libxl__colo_restore_teardown_done(egc, crs, rc);
+}
+
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs)
+{
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const int status = crcs->status;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    EGC_GC;
+
+    switch(status) {
+    case LIBXL_COLO_SETUPED:
+        /*
+         * We will come here only when reading emulator xenstore data or
+         * emulator context fails, and libxl__xc_domain_restore_done()
+         * is not called. In this case, the migration is not finished,
+         * so we cannot do failover.
+         */
+        LOG(ERROR, "migration fails");
+        crcs->callback(egc, crcs, ERROR_FAIL);
+        return;
+    case LIBXL_COLO_SUSPENDED:
+    case LIBXL_COLO_RESUMED:
+        /* disable logdirty first */
+        lds->callback = colo_disable_logdirty_done;
+        colo_disable_logdirty(crs, egc);
+        return;
+    default:
+        LOG(ERROR, "invalid status: %d", status);
+        crcs->callback(egc, crcs, ERROR_FAIL);
+    }
+}
+
+static void do_failover_done(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state* crcs,
+                             int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    EGC_GC;
+
+    if (rc)
+        LOG(ERROR, "cannot do failover");
+
+    libxl__colo_restore_teardown_done(egc, crs, rc);
+}
+
+static void colo_disable_logdirty_done(libxl__egc *egc,
+                                       libxl__logdirty_switch *lds,
+                                       int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+    EGC_GC;
+
+    if (rc)
+        LOG(WARN, "cannot disable logdirty");
+
+    if (crcs->status == LIBXL_COLO_SUSPENDED) {
+        /*
+         * failover when reading state from master, so no need to
+         * call libxl__qmp_restore().
+         */
+        colo_resume_vm(egc, crcs, 0);
+        return;
+    }
+
+    /* If we cannot disable logdirty, we still can do failover */
+    crcs->callback(egc, crcs, 0);
+}
+
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+                                              libxl__colo_restore_state *crs,
+                                              int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    EGC_GC;
+
+    /* convenience aliases */
+    const int domid = crs->domid;
+    const libxl_ctx *const ctx = libxl__gc_owner(gc);
+    xc_interface *const xch = ctx->xch;
+
+    if (!rc)
+        /* failover, no need to destroy the secondary vm */
+        goto out;
+
+    xc_domain_destroy(xch, domid);
+
+out:
+    if (crs->saved_cb) {
+        dcs->callback = crs->saved_cb;
+        crs->saved_cb = NULL;
+    }
+
+    dcs->callback(egc, dcs, rc, crs->domid);
+}
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+                                          libxl__stream_write_state *stream,
+                                          int rc);
+static void colo_common_read_stream_done(libxl__egc *egc,
+                                         libxl__stream_read_state *stream,
+                                         int rc);
+
+/* ======================== colo: checkpoint ======================= */
+
+/*
+ * Do the following things when resuming secondary vm:
+ *  1. read emulator xenstore data
+ *  2. read emulator context
+ *  3. REC_TYPE_CHECKPOINT_END
+ */
+static void libxl__colo_restore_domain_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+    libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    crcs->callback = NULL;
+    dcs->srs.checkpoint_callback = colo_common_read_stream_done;
+    libxl__stream_read_start_checkpoint(shs->egc, &dcs->srs);
+}
+
+/* ===================== colo: resume secondary vm ===================== */
+
+/*
+ * Do the following things when resuming secondary vm the first time:
+ *  1. resume secondary vm
+ *  2. enable log dirty
+ *  3. setup checkpoint devices
+ *  4. write CHECKPOINT_SVM_READY
+ *  5. unpause secondary vm
+ *  6. write CHECKPOINT_SVM_RESUMED
+ *
+ * Do the following things when resuming secondary vm:
+ *  1. write CHECKPOINT_SVM_READY
+ *  2. resume secondary vm
+ *  3. write CHECKPOINT_SVM_RESUMED
+ */
+static void colo_send_svm_ready(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs);
+static void colo_send_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_restore_checkpoint_state *crcs,
+                                     int rc);
+static void colo_restore_preresume_cb(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc);
+static void colo_restore_resume_vm(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs);
+static void colo_resume_vm_done(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs,
+                                int rc);
+static void colo_write_svm_resumed(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs);
+static void colo_enable_logdirty_done(libxl__egc *egc,
+                                      libxl__logdirty_switch *lds,
+                                      int retval);
+static void colo_reenable_logdirty(libxl__egc *egc,
+                                   libxl__logdirty_switch *lds,
+                                   int rc);
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+                                        libxl__logdirty_switch *lds,
+                                        int rc);
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+                                          libxl__colo_restore_state *crs);
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc);
+static void colo_unpause_svm(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state *crcs);
+
+static void libxl__colo_restore_domain_resume_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+    libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    if (crcs->teardown_devices)
+        colo_send_svm_ready(shs->egc, crcs);
+    else
+        colo_restore_resume_vm(shs->egc, crcs);
+}
+
+static void colo_send_svm_ready(libxl__egc *egc,
+                               libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_READY };
+
+    crcs->callback = colo_send_svm_ready_done;
+    crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+    libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+}
+
+static void colo_send_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_restore_checkpoint_state *crcs,
+                                     int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+    if (!crcs->preresume) {
+        crcs->preresume = true;
+        colo_unpause_svm(egc, crcs);
+        return;
+    }
+
+    cds->callback = colo_restore_preresume_cb;
+    libxl__checkpoint_devices_preresume(egc, cds);
+}
+
+static void colo_restore_preresume_cb(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc)
+{
+    libxl__colo_restore_state *crs = cds->concrete_data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "preresume fails");
+        goto out;
+    }
+
+    colo_restore_resume_vm(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_resume_vm(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs)
+{
+
+    crcs->callback = colo_resume_vm_done;
+    colo_resume_vm(egc, crcs, 1);
+}
+
+static void colo_resume_vm_done(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs,
+                                int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "cannot resume secondary vm");
+        goto out;
+    }
+
+    crcs->status = LIBXL_COLO_RESUMED;
+
+    /* avoid calling stream->completion_callback() more than once */
+    if (crs->saved_cb) {
+        dcs->callback = crs->saved_cb;
+        crs->saved_cb = NULL;
+
+        dcs->srs.completion_callback = NULL;
+
+        lds->callback = colo_enable_logdirty_done;
+        colo_enable_logdirty(crs, egc);
+        return;
+    }
+
+    colo_write_svm_resumed(egc, crcs);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_write_svm_resumed(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_RESUMED };
+
+    crcs->callback = NULL;
+    crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+    libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+}
+
+static void colo_enable_logdirty_done(libxl__egc *egc,
+                                      libxl__logdirty_switch *lds,
+                                      int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    EGC_GC;
+
+    if (rc) {
+        /*
+         * log-dirty already enabled? There's no test op,
+         * so attempt to disable then reenable it
+         */
+        lds->callback = colo_reenable_logdirty;
+        colo_disable_logdirty(crs, egc);
+        return;
+    }
+
+    colo_setup_checkpoint_devices(egc, crs);
+}
+
+static void colo_reenable_logdirty(libxl__egc *egc,
+                                   libxl__logdirty_switch *lds,
+                                   int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "cannot enable logdirty");
+        goto out;
+    }
+
+    lds->callback = colo_reenable_logdirty_done;
+    colo_enable_logdirty(crs, egc);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+                                        libxl__logdirty_switch *lds,
+                                        int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "cannot enable logdirty");
+        goto out;
+    }
+
+    colo_setup_checkpoint_devices(egc, crcs->crs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+/*
+ * We cannot setup checkpoint devices in libxl__colo_restore_setup(),
+ * because the guest is not ready.
+ */
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+                                          libxl__colo_restore_state *crs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &dcs->cds;
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    STATE_AO_GC(crs->ao);
+
+    /* TODO: disk/nic support */
+    cds->device_kind_flags = 0;
+    cds->callback = colo_restore_setup_cds_done;
+    cds->ao = ao;
+    cds->domid = crs->domid;
+    cds->ops = colo_restore_ops;
+
+    if (init_device_subkind(cds))
+        goto out;
+
+    crcs->teardown_devices = 1;
+
+    libxl__checkpoint_devices_setup(egc, cds);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc)
+{
+    libxl__colo_restore_state *crs = cds->concrete_data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+            cds->domid);
+        goto out;
+    }
+
+    colo_send_svm_ready(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_unpause_svm(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int rc;
+
+    /* Convenience aliases */
+    const uint32_t domid = crcs->crs->domid;
+    libxl__save_helper_state *const shs = &dcs->srs.shs;
+
+    EGC_GC;
+
+    /* We have enabled secondary vm's logdirty, so we can unpause it now */
+    rc = libxl_domain_unpause(CTX, domid);
+    if (rc) {
+        LOG(ERROR, "cannot unpause secondary vm");
+        goto out;
+    }
+
+    colo_write_svm_resumed(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+/* ===================== colo: wait new checkpoint ===================== */
+
+static void colo_restore_commit_cb(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc);
+static void colo_stream_read_done(libxl__egc *egc,
+                                  libxl__colo_restore_checkpoint_state *crcs,
+                                  int real_size);
+
+static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+    libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+    cds->callback = colo_restore_commit_cb;
+    libxl__checkpoint_devices_commit(shs->egc, cds);
+}
+
+static void colo_restore_commit_cb(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc)
+{
+    libxl__colo_restore_state *crs = cds->concrete_data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "commit fails");
+        goto out;
+    }
+
+    crcs->callback = colo_stream_read_done;
+    dcs->srs.checkpoint_callback = colo_common_read_stream_done;
+    libxl__stream_read_checkpoint_state(egc, &dcs->srs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
+}
+
+static void colo_stream_read_done(libxl__egc *egc,
+                                  libxl__colo_restore_checkpoint_state *crcs,
+                                  int id)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int ok = 0;
+
+    EGC_GC;
+
+    if (id != CHECKPOINT_NEW) {
+        LOG(ERROR, "invalid section: %d", id);
+        goto out;
+    }
+
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
+
+/* ===================== colo: suspend secondary vm ===================== */
+
+/*
+ * Do the following things when resuming secondary vm:
+ *  1. suspend secondary vm
+ *  2. send CHECKPOINT_SVM_SUSPENDED
+ */
+static void colo_suspend_vm_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dsps,
+                                 int ok);
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc);
+
+static void libxl__colo_restore_domain_suspend_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs);
+    libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    STATE_AO_GC(dcs->ao);
+
+    /* Convenience aliases */
+    libxl__domain_suspend_state *const dsps = &crcs->dsps;
+
+    /* suspend secondary vm */
+    dsps->callback_common_done = colo_suspend_vm_done;
+
+    libxl__domain_suspend(shs->egc, dsps);
+}
+
+static void colo_suspend_vm_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dsps,
+                                 int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dsps, *crcs, dsps);
+    libxl__colo_restore_state *crs = crcs->crs;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &dcs->cds;
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "cannot suspend secondary vm");
+        goto out;
+    }
+
+    crcs->status = LIBXL_COLO_SUSPENDED;
+
+    cds->callback = colo_restore_postsuspend_cb;
+    libxl__checkpoint_devices_postsuspend(egc, cds);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+}
+
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc)
+{
+    libxl__colo_restore_state *crs = cds->concrete_data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+    libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_SUSPENDED };
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "postsuspend fails");
+        goto out;
+    }
+
+    crcs->callback = NULL;
+    crcs->sws.checkpoint_callback = colo_common_write_stream_done;
+    libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+}
+
+/* ===================== colo: common callback ===================== */
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+                                          libxl__stream_write_state *stream,
+                                          int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs =
+        CONTAINER_OF(stream, *crcs, sws);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int ok;
+
+    EGC_GC;
+
+    if (rc < 0) {
+        /* TODO: it may be a internal error, but we don't know */
+        LOG(ERROR, "sending data fails");
+        ok = 2;
+        goto out;
+    }
+
+    if (!crcs->callback) {
+        /* Everythins is OK */
+        ok = 1;
+        goto out;
+    }
+
+    crcs->callback(egc, crcs, 0);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
+
+static void colo_common_read_stream_done(libxl__egc *egc,
+                                         libxl__stream_read_state *stream,
+                                         int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(stream, *dcs, srs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+    int ok;
+
+    EGC_GC;
+
+    if (rc < 0) {
+        /* TODO: it may be a internal error, but we don't know */
+        LOG(ERROR, "reading data fails");
+        ok = 2;
+        goto out;
+    }
+
+    if (!crcs->callback) {
+        /* Everythins is OK */
+        ok = 1;
+        goto out;
+    }
+
+    /* rc contains the id */
+    crcs->callback(egc, crcs, rc);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok);
+}
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 4d2b95c..c58dd7e 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -985,6 +985,23 @@  static void domcreate_console_available(libxl__egc *egc,
                                         dcs->aop_console_how.for_event));
 }
 
+static void libxl__colo_restore_setup_done(libxl__egc *egc,
+                                           libxl__colo_restore_state *crs,
+                                           int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    EGC_GC;
+
+    if (rc) {
+        LOG(ERROR, "colo restore setup fails: %d", rc);
+        domcreate_stream_done(egc, &dcs->srs, rc);
+        return;
+    }
+
+    libxl__stream_read_start(egc, &dcs->srs);
+}
+
 static void domcreate_bootloader_done(libxl__egc *egc,
                                       libxl__bootloader_state *bl,
                                       int rc)
@@ -998,6 +1015,8 @@  static void domcreate_bootloader_done(libxl__egc *egc,
     const int restore_fd = dcs->restore_fd;
     libxl__domain_build_state *const state = &dcs->build_state;
     const int checkpointed_stream = dcs->restore_params.checkpointed_stream;
+    libxl__colo_restore_state *const crs = &dcs->crs;
+    libxl_domain_build_info *const info = &d_config->b_info;
 
     if (rc) {
         domcreate_rebuild_done(egc, dcs, rc);
@@ -1026,6 +1045,22 @@  static void domcreate_bootloader_done(libxl__egc *egc,
 
     /* Restore */
 
+    /* COLO only supports HVM now because it does not work very
+     * well with pv drivers:
+     * 1. We need to resume vm in the slow path. In this case we
+     *    need to disconnect/reconnect backend and frontend. It
+     *    will take too much time and the performance is very slow.
+     * 2. PV disk cannot reuse block replication that is implemented
+     *    in QEMU.
+     */
+    if (info->type != LIBXL_DOMAIN_TYPE_HVM &&
+        checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+        LOG(ERROR, "COLO only supports HVM, unable to restore domain %d",
+            domid);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
     rc = libxl__build_pre(gc, domid, d_config, state);
     if (rc)
         goto out;
@@ -1039,6 +1074,16 @@  static void domcreate_bootloader_done(libxl__egc *egc,
 
     if (restore_fd >= 0) {
         switch (checkpointed_stream) {
+        case LIBXL_CHECKPOINTED_STREAM_COLO:
+            /* colo restore setup */
+            crs->ao = ao;
+            crs->domid = domid;
+            crs->send_back_fd = dcs->send_back_fd;
+            crs->recv_fd = restore_fd;
+            crs->hvm = (info->type == LIBXL_DOMAIN_TYPE_HVM);
+            crs->callback = libxl__colo_restore_setup_done;
+            libxl__colo_restore_setup(egc, crs);
+            break;
         case LIBXL_CHECKPOINTED_STREAM_REMUS:
             libxl__remus_restore_setup(egc, dcs);
             /* fall through */
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 1fafba8..83ac20a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -87,6 +87,8 @@ 
 #include "_libxl_types_internal.h"
 #include "_libxl_types_internal_json.h"
 
+#include "libxl_colo.h"
+
 #define LIBXL_INIT_TIMEOUT 10
 #define LIBXL_DESTROY_TIMEOUT 10
 #define LIBXL_HOTPLUG_TIMEOUT 40
@@ -3422,12 +3424,6 @@  _hidden int libxl__destroy_qdisk_backend(libxl__gc *gc, uint32_t domid);
 
 /*----- Domain creation -----*/
 
-typedef struct libxl__domain_create_state libxl__domain_create_state;
-
-typedef void libxl__domain_create_cb(libxl__egc *egc,
-                                     libxl__domain_create_state*,
-                                     int rc, uint32_t domid);
-
 /* State for manipulating a libxl migration v2 stream */
 typedef struct libxl__stream_read_state libxl__stream_read_state;
 
@@ -3510,6 +3506,8 @@  struct libxl__domain_create_state {
     /* private to domain_create */
     int guest_domid;
     libxl__domain_build_state build_state;
+    libxl__colo_restore_state crs;
+    libxl__checkpoint_devices_state cds;
     libxl__bootloader_state bl;
     libxl__stub_dm_spawn_state dmss;
         /* If we're not doing stubdom, we use only dmss.dm,
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index f15c235..2e6267d 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -68,7 +68,11 @@  void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs,
     shs->ao = ao;
     shs->domid = domid;
     shs->recv_callback = libxl__srm_callout_received_restore;
-    shs->completion_callback = libxl__xc_domain_restore_done;
+    if (dcs->restore_params.checkpointed_stream ==
+        LIBXL_CHECKPOINTED_STREAM_COLO)
+        shs->completion_callback = libxl__colo_restore_teardown;
+    else
+        shs->completion_callback = libxl__xc_domain_restore_done;
     shs->caller_state = dcs;
     shs->need_results = 1;
 
diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl
index d6d2967..cbb6ca1 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -23,14 +23,15 @@  our @msgs = (
                                                  STRING doing_what),
                                                 'unsigned long', 'done',
                                                 'unsigned long', 'total'] ],
-    [  3, 'scxA',   "suspend", [] ],
-    [  4, 'scxA',   "postcopy", [] ],
+    [  3, 'srcxA',  "suspend", [] ],
+    [  4, 'srcxA',  "postcopy", [] ],
     [  5, 'srcxA',  "checkpoint", [] ],
-    [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
+    [  6, 'rcxA',   "wait_checkpoint", [] ],
+    [  7, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                               unsigned enable)] ],
-    [  7, 'r',      "restore_results",       ['unsigned long', 'store_mfn',
+    [  8, 'r',      "restore_results",       ['unsigned long', 'store_mfn',
                                               'unsigned long', 'console_mfn'] ],
-    [  8, 'srW',    "complete",              [qw(int retval
+    [  9, 'srW',    "complete",              [qw(int retval
                                                  int errnoval)] ],
 );
 
diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 302ae53..9659051 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -850,6 +850,18 @@  void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
      */
     if (libxl__stream_read_inuse(stream)) {
         switch (checkpointed_stream) {
+        case LIBXL_CHECKPOINTED_STREAM_COLO:
+            if (stream->completion_callback) {
+                /*
+                 * restore, just build the secondary vm, don't close
+                 * the stream
+                 */
+                stream->completion_callback(egc, stream, 0);
+            } else {
+                /* failover, just close the stream */
+                stream_complete(egc, stream, 0);
+            }
+            break;
         case LIBXL_CHECKPOINTED_STREAM_REMUS:
             /*
              * Failover from primary. Domain state is currently at a
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 59b183c..4717517 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -233,6 +233,7 @@  libxl_hdtype = Enumeration("hdtype", [
 libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
     (0, "NONE"),
     (1, "REMUS"),
+    (2, "COLO"),
     ])
 
 #