diff mbox

[v10,25/31] COLO: use qemu block replication

Message ID 1456109555-28299-26-git-send-email-wency@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wen Congyang Feb. 22, 2016, 2:52 a.m. UTC
Use qemu block replication as our block replication solution.
Note that guest must be paused before starting COLO, otherwise,
the disk won't be consistent between primary and secondary.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
---
 tools/libxl/Makefile             |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 226 +++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_colo_restore.c |  42 +++++++-
 tools/libxl/libxl_colo_save.c    |  54 +++++++++-
 tools/libxl/libxl_internal.h     |  13 +++
 5 files changed, 331 insertions(+), 5 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

Comments

Wei Liu March 2, 2016, 3:03 p.m. UTC | #1
On Mon, Feb 22, 2016 at 10:52:29AM +0800, Wen Congyang wrote:
> Use qemu block replication as our block replication solution.
> Note that guest must be paused before starting COLO, otherwise,
> the disk won't be consistent between primary and secondary.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
> ---
>  tools/libxl/Makefile             |   1 +
>  tools/libxl/libxl_colo_qdisk.c   | 226 +++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_colo_restore.c |  42 +++++++-
>  tools/libxl/libxl_colo_save.c    |  54 +++++++++-
>  tools/libxl/libxl_internal.h     |  13 +++

All the changes look internal to COLO and are trying to manipulate QEMU
or whatnot. I don't think I can provide very detailed feedback. I am
tempted to just ack this patch.

Wei.
diff mbox

Patch

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 2016393..28d54d0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -66,6 +66,7 @@  endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 0000000..6179947
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,226 @@ 
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+    bool setuped;
+} libxl__colo_qdisk;
+
+/* ========== init() and cleanup() ========== */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+    /*
+     * We don't know if we use qemu block replication, so
+     * we cannot start block replication here.
+     */
+    return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+                             bool primary)
+{
+    const libxl_device_disk *disk = dev->backend_dev;
+    int ret, rc = 0;
+    libxl__colo_qdisk *colo_qdisk = NULL;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const char *host = disk->colo_host;
+    const char *port = disk->colo_port;
+    const char *export_name = disk->colo_export;
+    const int domid = cds->domid;
+
+    STATE_AO_GC(dev->cds->ao);
+
+    if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+        !libxl_defbool_val(disk->colo_enable)) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    dev->matched = true;
+
+    GCNEW(colo_qdisk);
+    dev->concrete_data = colo_qdisk;
+
+    if (primary) {
+        libxl__colo_save_state *css = cds->concrete_data;
+
+        css->qdisk_used = true;
+        /* NBD server is not ready, so we cannot start block replication now */
+        goto out;
+    } else {
+        libxl__colo_restore_state *crs = cds->concrete_data;
+
+        if (!crs->qdisk_used) {
+            /* start nbd server */
+            ret = libxl__qmp_nbd_server_start(gc, domid, host, port);
+            if (ret) {
+                rc = ERROR_FAIL;
+                goto out;
+            }
+            crs->host = host;
+            crs->port = port;
+        } else {
+            if (strcmp(crs->host, host) || strcmp(crs->port, port)) {
+                LOG(ERROR, "The host and port of all disks must be the same");
+                rc = ERROR_FAIL;
+                goto out;
+            }
+        }
+
+        crs->qdisk_used = true;
+
+        ret = libxl__qmp_nbd_server_add(gc, domid, export_name);
+        if (ret)
+            rc = ERROR_FAIL;
+
+        colo_qdisk->setuped = true;
+    }
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+                                bool primary)
+{
+    int ret, rc = 0;
+    const libxl__colo_qdisk *colo_qdisk = dev->concrete_data;
+    const libxl_device_disk *disk = dev->backend_dev;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const int domid = cds->domid;
+    const char *export_name = disk->colo_export;
+
+    EGC_GC;
+
+    if (primary) {
+        if (!colo_qdisk->setuped)
+            goto out;
+
+        /*
+         * There is no way to get the child name, but we know it is children.1
+         */
+        ret = libxl__qmp_x_blockdev_change(gc, domid, export_name,
+                                           "children.1", NULL);
+        if (ret)
+            rc = ERROR_FAIL;
+    } else {
+        libxl__colo_restore_state *crs = cds->concrete_data;
+
+        if (crs->qdisk_used) {
+            ret = libxl__qmp_nbd_server_stop(gc, domid);
+            if (ret)
+                rc = ERROR_FAIL;
+        }
+    }
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+static void colo_qdisk_save_preresume(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    libxl__colo_qdisk *colo_qdisk = dev->concrete_data;
+    const libxl_device_disk *disk = dev->backend_dev;
+    int ret, rc = 0;
+    char *node = NULL;
+    char *cmd = NULL;
+
+    /* Convenience aliases */
+    const int domid = dev->cds->domid;
+    const char *host = disk->colo_host;
+    const char *port = disk->colo_port;
+    const char *export_name = disk->colo_export;
+
+    EGC_GC;
+
+    if (colo_qdisk->setuped)
+        goto out;
+
+    /* qmp command doesn't support the driver "nbd" */
+    node = GCSPRINTF("colo_node%d",
+                     libxl__device_disk_dev_number(disk->vdev, NULL, NULL));
+    cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary,"
+                    "file.driver=nbd,file.host=%s,file.port=%s,"
+                    "file.export=%s,node-name=%s,if=none",
+                    host, port, export_name, node);
+    ret = libxl__qmp_hmp(gc, domid, cmd);
+    if (ret)
+        rc = ERROR_FAIL;
+
+    ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node);
+    if (ret)
+        rc = ERROR_FAIL;
+
+    colo_qdisk->setuped = true;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ======== primary ======== */
+static void colo_qdisk_save_setup(libxl__egc *egc,
+                                  libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, true);
+}
+
+static void colo_qdisk_save_teardown(libxl__egc *egc,
+                                   libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, true);
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_save_setup,
+    .teardown = colo_qdisk_save_teardown,
+    .preresume = colo_qdisk_save_preresume,
+};
+
+/* ======== secondary ======== */
+static void colo_qdisk_restore_setup(libxl__egc *egc,
+                                     libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, false);
+}
+
+static void colo_qdisk_restore_teardown(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, false);
+}
+
+const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_restore_setup,
+    .teardown = colo_qdisk_restore_teardown,
+};
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 0ef5b5f..eff80ca 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -50,7 +50,10 @@  static void libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    &colo_restore_device_qdisk,
     NULL,
 };
 
@@ -150,7 +153,11 @@  static int init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc)  goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -158,6 +165,8 @@  static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 
@@ -214,6 +223,8 @@  void libxl__colo_restore_setup(libxl__egc *egc,
     GCNEW(crcs);
     crs->crcs = crcs;
     crcs->crs = crs;
+    crs->qdisk_setuped = false;
+    crs->qdisk_used = false;
 
     /* setup dsps */
     crcs->dsps.ao = ao;
@@ -305,6 +316,11 @@  void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void,
     }
     libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval);
 
+    if (crs->qdisk_setuped) {
+        libxl__qmp_stop_replication(gc, crs->domid, false);
+        crs->qdisk_setuped = false;
+    }
+
     crcs->saved_rc = rc;
     if (!crcs->teardown_devices) {
         colo_restore_teardown_devices_done(egc, &dcs->cds, 0);
@@ -582,6 +598,13 @@  static void colo_restore_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (crs->qdisk_setuped) {
+        if (libxl__qmp_do_checkpoint(gc, crs->domid)) {
+            LOG(ERROR, "doing checkpoint fails");
+            goto out;
+        }
+    }
+
     colo_restore_resume_vm(egc, crcs);
 
     return;
@@ -739,8 +762,8 @@  static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
     STATE_AO_GC(crs->ao);
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->callback = colo_restore_setup_cds_done;
     cds->ao = ao;
     cds->domid = crs->domid;
@@ -777,6 +800,14 @@  static void colo_restore_setup_cds_done(libxl__egc *egc,
         goto out;
     }
 
+    if (crs->qdisk_used && !crs->qdisk_setuped) {
+        if (libxl__qmp_start_replication(gc, crs->domid, false)) {
+            LOG(ERROR, "starting replication fails");
+            goto out;
+        }
+        crs->qdisk_setuped = true;
+    }
+
     colo_send_svm_ready(egc, crcs);
 
     return;
@@ -931,13 +962,18 @@  static void colo_suspend_vm_done(libxl__egc *egc,
 
     crcs->status = LIBXL_COLO_SUSPENDED;
 
+    if (libxl__qmp_get_replication_error(gc, crs->domid)) {
+        LOG(ERROR, "replication error occurs when secondary vm is running");
+        goto out;
+    }
+
     cds->callback = colo_restore_postsuspend_cb;
     libxl__checkpoint_devices_postsuspend(egc, cds);
 
     return;
 
 out:
-    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
 }
 
 static void colo_restore_postsuspend_cb(libxl__egc *egc,
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index 8b18a91..5f8456c 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -19,7 +19,10 @@ 
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    &colo_save_device_qdisk,
     NULL,
 };
 
@@ -30,7 +33,11 @@  static int init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc) goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -38,6 +45,8 @@  static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 /* ================= colo: setup save environment ================= */
@@ -71,9 +80,12 @@  void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
     css->send_fd = dss->fd;
     css->recv_fd = dss->recv_fd;
     css->svm_running = false;
+    css->paused = true;
+    css->qdisk_setuped = false;
+    css->qdisk_used = false;
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
@@ -153,6 +165,11 @@  void libxl__colo_save_teardown(libxl__egc *egc,
 
     libxl__stream_read_abort(egc, &css->srs, 1);
 
+    if (css->qdisk_setuped) {
+        libxl__qmp_stop_replication(gc, dss->domid, true);
+        css->qdisk_setuped = false;
+    }
+
     dss->cds.callback = colo_teardown_done;
     libxl__checkpoint_devices_teardown(egc, &dss->cds);
     return;
@@ -287,6 +304,11 @@  static void colo_read_svm_suspended_done(libxl__egc *egc,
         goto out;
     }
 
+    if (!css->paused && libxl__qmp_get_replication_error(gc, dss->domid)) {
+        LOG(ERROR, "replication error occurs when primary vm is running");
+        goto out;
+    }
+
     ok = 1;
 
 out:
@@ -384,12 +406,40 @@  static void colo_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (css->qdisk_used && !css->qdisk_setuped) {
+        if (libxl__qmp_start_replication(gc, dss->domid, true)) {
+            LOG(ERROR, "starting replication fails");
+            goto out;
+        }
+        css->qdisk_setuped = true;
+    }
+
+    if (!css->paused) {
+        if (libxl__qmp_do_checkpoint(gc, dss->domid)) {
+            LOG(ERROR, "doing checkpoint fails");
+            goto out;
+        }
+    }
+
     /* Resumes the domain and the device model */
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
         LOG(ERROR, "cannot resume primary vm");
         goto out;
     }
 
+    /*
+     * The guest should be paused before doing colo because there is
+     * no disk migration.
+     */
+    if (css->paused) {
+        rc = libxl_domain_unpause(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "cannot unpause primary vm");
+            goto out;
+        }
+        css->paused = false;
+    }
+
     /* read CHECKPOINT_SVM_RESUMED */
     css->callback = colo_read_svm_resumed_done;
     css->srs.checkpoint_callback = colo_common_read_stream_done;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 227b1d3..3af5fdd 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2902,6 +2902,8 @@  int init_subkind_nic(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds);
 int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
                                         libxl__checkpoint_devices_state *,
@@ -3119,6 +3121,11 @@  struct libxl__colo_save_state {
     libxl__stream_read_state srs;
     void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
     bool svm_running;
+    bool paused;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_used;
+    bool qdisk_setuped;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3522,6 +3529,12 @@  struct libxl__colo_restore_state {
     /* private, colo restore checkpoint state */
     libxl__domain_create_cb *saved_cb;
     void *crcs;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_used;
+    bool qdisk_setuped;
+    const char *host;
+    const char *port;
 };
 
 struct libxl__domain_create_state {