diff mbox

[v13,24/26] setup and control colo proxy on primary side

Message ID 1458888273-7469-25-git-send-email-xiecl.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Changlong Xie March 25, 2016, 6:44 a.m. UTC
From: Wen Congyang <wency@cn.fujitsu.com>

Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl_colo.h            | 25 ++++++++++
 tools/libxl/libxl_colo_save.c       | 94 +++++++++++++++++++++++++++++++++----
 tools/libxl/libxl_internal.h        |  1 +
 tools/libxl/libxl_remus_disk_drbd.c | 38 ++-------------
 4 files changed, 115 insertions(+), 43 deletions(-)
diff mbox

Patch

diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 5fbb659..30fd1dc 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -26,6 +26,31 @@  struct libxl__checkpoint_devices_state;
 /* Consistent with the new COLO netlink channel in kernel side */
 #define NETLINK_COLO 28
 
+/* Maximum time(5s) to wait for colo proxy checkpoit */
+#define COLO_PROXY_CHECKPOINT_TIMEOUT 5000000
+
+#define ASYNC_CALL(egc, ao, child, param, func, callback) do {          \
+    int pid = -1;                                                       \
+    STATE_AO_GC(ao);                                                    \
+                                                                        \
+    pid = libxl__ev_child_fork(gc, child, callback);                    \
+    if (pid == -1) {                                                    \
+        LOG(ERROR, "unable to fork");                                   \
+        goto out;                                                       \
+    }                                                                   \
+                                                                        \
+    if (!pid) {                                                         \
+        /* child */                                                     \
+        func(param);                                                    \
+        /* notreached */                                                \
+        abort();                                                        \
+    }                                                                   \
+                                                                        \
+    return;                                                             \
+out:                                                                    \
+    callback(egc, child, -1, 1);                                        \
+} while (0)
+
 enum {
     LIBXL_COLO_SETUPED,
     LIBXL_COLO_SUSPENDED,
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index d73e632..e2fdc4b 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -18,9 +18,11 @@ 
 
 #include "libxl_internal.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_nic;
 extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
 
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    &colo_save_device_nic,
     &colo_save_device_qdisk,
     NULL,
 };
@@ -33,9 +35,15 @@  static int init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
-    rc = init_subkind_qdisk(cds);
+    rc = init_subkind_colo_nic(cds);
     if (rc) goto out;
 
+    rc = init_subkind_qdisk(cds);
+    if (rc) {
+        cleanup_subkind_colo_nic(cds);
+        goto out;
+    }
+
     rc = 0;
 out:
     return rc;
@@ -46,6 +54,7 @@  static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
 
+    cleanup_subkind_colo_nic(cds);
     cleanup_subkind_qdisk(cds);
 }
 
@@ -91,9 +100,16 @@  void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
     css->paused = true;
     css->qdisk_setuped = false;
     css->qdisk_used = false;
+    libxl__ev_child_init(&css->child);
+
+    if (dss->remus->netbufscript)
+        css->colo_proxy_script = libxl__strdup(gc, dss->remus->netbufscript);
+    else
+        css->colo_proxy_script = GCSPRINTF("%s/colo-proxy-setup",
+                                           libxl__xen_script_dir_path());
 
-    /* TODO: nic support */
-    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VIF) |
+                             (1 << LIBXL__DEVICE_KIND_VBD);
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
@@ -104,6 +120,12 @@  void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
     css->srs.fd = css->recv_fd;
     css->srs.back_channel = true;
     libxl__stream_read_start(egc, &css->srs);
+    css->cps.ao = ao;
+    if (colo_proxy_setup(&css->cps)) {
+        LOG(ERROR, "COLO: failed to setup colo proxy for guest with domid %u",
+            cds->domid);
+        goto out;
+    }
 
     if (init_device_subkind(cds))
         goto out;
@@ -193,6 +215,7 @@  static void colo_teardown_done(libxl__egc *egc,
     libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
 
     cleanup_device_subkind(cds);
+    colo_proxy_teardown(&css->cps);
     dss->callback(egc, dss, rc);
 }
 
@@ -387,6 +410,8 @@  static void colo_read_svm_ready_done(libxl__egc *egc,
         goto out;
     }
 
+    colo_proxy_preresume(&css->cps);
+
     css->svm_running = true;
     dss->cds.callback = colo_preresume_cb;
     libxl__checkpoint_devices_preresume(egc, &dss->cds);
@@ -471,6 +496,8 @@  static void colo_read_svm_resumed_done(libxl__egc *egc,
         goto out;
     }
 
+    colo_proxy_postresume(&css->cps);
+
     ok = 1;
 
 out:
@@ -479,6 +506,61 @@  out:
 
 /* ===================== colo: wait new checkpoint ===================== */
 
+static void colo_start_new_checkpoint(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc);
+static void colo_proxy_async_wait_for_checkpoint(libxl__colo_save_state *css);
+static void colo_proxy_async_call_done(libxl__egc *egc,
+                                       libxl__ev_child *child,
+                                       int pid,
+                                       int status);
+
+static void colo_proxy_wait_for_checkpoint(libxl__egc *egc,
+                                           libxl__colo_save_state *css)
+{
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    ASYNC_CALL(egc, dss->cds.ao, &css->child, css,
+               colo_proxy_async_wait_for_checkpoint,
+               colo_proxy_async_call_done);
+}
+
+static void colo_proxy_async_wait_for_checkpoint(libxl__colo_save_state *css)
+{
+    int req;
+
+    req = colo_proxy_checkpoint(&css->cps, COLO_PROXY_CHECKPOINT_TIMEOUT);
+    if (req < 0) {
+        /* some error happens */
+        _exit(1);
+    } else if (!req) {
+        /* no checkpoint is needed, do a checkpoint every 5s */
+        _exit(0);
+    } else {
+        /* net packets is not consistent, we need to start a checkpoint */
+        _exit(0);
+    }
+}
+
+static void colo_proxy_async_call_done(libxl__egc *egc,
+                                       libxl__ev_child *child,
+                                       int pid,
+                                       int status)
+{
+    libxl__colo_save_state *css = CONTAINER_OF(child, *css, child);
+    libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+    EGC_GC;
+
+    if (status) {
+        LOG(ERROR, "failed to wait for new checkpoint");
+        colo_start_new_checkpoint(egc, &dss->cds, ERROR_FAIL);
+        return;
+    }
+
+    colo_start_new_checkpoint(egc, &dss->cds, 0);
+}
+
 /*
  * Do the following things:
  * 1. do commit
@@ -488,9 +570,6 @@  out:
 static void colo_device_commit_cb(libxl__egc *egc,
                                   libxl__checkpoint_devices_state *cds,
                                   int rc);
-static void colo_start_new_checkpoint(libxl__egc *egc,
-                                      libxl__checkpoint_devices_state *cds,
-                                      int rc);
 
 static void libxl__colo_save_domain_wait_checkpoint_callback(void *data)
 {
@@ -520,8 +599,7 @@  static void colo_device_commit_cb(libxl__egc *egc,
         goto out;
     }
 
-    /* TODO: wait a new checkpoint */
-    colo_start_new_checkpoint(egc, cds, 0);
+    colo_proxy_wait_for_checkpoint(egc, css);
     return;
 
 out:
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 759b8d0..e3c919d 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3220,6 +3220,7 @@  struct libxl__colo_save_state {
 
     /* private, used by colo-proxy */
     libxl__colo_proxy_state cps;
+    libxl__ev_child child;
 };
 
 typedef struct libxl__logdirty_switch {
diff --git a/tools/libxl/libxl_remus_disk_drbd.c b/tools/libxl/libxl_remus_disk_drbd.c
index 844dd66..d08e470 100644
--- a/tools/libxl/libxl_remus_disk_drbd.c
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -42,38 +42,6 @@  void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds)
     return;
 }
 
-/*----- helper functions, for async calls -----*/
-static void drbd_async_call(libxl__egc *egc,
-                            libxl__checkpoint_device *dev,
-                            void func(libxl__checkpoint_device *),
-                            libxl__ev_child_callback callback)
-{
-    int pid, rc;
-    libxl__ao_device *aodev = &dev->aodev;
-    STATE_AO_GC(dev->cds->ao);
-
-    /* Fork and call */
-    pid = libxl__ev_child_fork(gc, &aodev->child, callback);
-    if (pid == -1) {
-        LOG(ERROR, "unable to fork");
-        rc = ERROR_FAIL;
-        goto out;
-    }
-
-    if (!pid) {
-        /* child */
-        func(dev);
-        /* notreached */
-        abort();
-    }
-
-    return;
-
-out:
-    aodev->rc = rc;
-    aodev->callback(egc, aodev);
-}
-
 /*----- match(), setup() and teardown() -----*/
 
 /* callbacks */
@@ -213,9 +181,9 @@  static void drbd_preresume_async(libxl__checkpoint_device *dev);
 
 static void drbd_preresume(libxl__egc *egc, libxl__checkpoint_device *dev)
 {
-    STATE_AO_GC(dev->cds->ao);
-
-    drbd_async_call(egc, dev, drbd_preresume_async, checkpoint_async_call_done);
+    ASYNC_CALL(egc, dev->cds->ao, &dev->aodev.child, dev,
+               drbd_preresume_async,
+               checkpoint_async_call_done);
 }
 
 static void drbd_preresume_async(libxl__checkpoint_device *dev)