diff mbox

[COLO-Frame,v16,16/35] COLO: synchronize PVM's state to SVM periodically

Message ID 570CF1C4.5000408@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhanghailiang April 12, 2016, 1:01 p.m. UTC
On 2016/4/12 11:02, Li Zhijian wrote:
>
>
> On 04/08/2016 02:26 PM, zhanghailiang wrote:
>> Do checkpoint periodically, the default interval is 200ms.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>> v12:
>> - Add Reviewed-by tag
>> v11:
>> - Fix wrong sleep time for checkpoint period. (Dave's comment)
>> ---
>>    migration/colo.c | 12 ++++++++++++
>>    1 file changed, 12 insertions(+)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 4dae069..4e3b39f 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -11,6 +11,7 @@
>>     */
>>
>>    #include "qemu/osdep.h"
>> +#include "qemu/timer.h"
>>    #include "sysemu/sysemu.h"
>>    #include "migration/colo.h"
>>    #include "trace.h"
>> @@ -231,6 +232,7 @@ out:
>>    static void colo_process_checkpoint(MigrationState *s)
>>    {
>>        QEMUSizedBuffer *buffer = NULL;
>> +    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>        Error *local_err = NULL;
>>        int ret;
>>
>> @@ -262,11 +264,21 @@ static void colo_process_checkpoint(MigrationState *s)
>>        trace_colo_vm_state_change("stop", "run");
>>
>>        while (s->state == MIGRATION_STATUS_COLO) {
>> +        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>> +        if (current_time - checkpoint_time <
>> +            s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY]) {
>> +            int64_t delay_ms;
>> +
>> +            delay_ms = s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY] -
>> +                       (current_time - checkpoint_time);
>> +            g_usleep(delay_ms * 1000);
>
> Once a large value(e.g. 1000000) is set to s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
> that means here will sleep 1000 seconds and people can't revert this operation.
> Can we let this sleep operation more flexible ?
>

Good catch, that is really a problem, we can solve it by the follow patch,
it will simplify the delay time process in COLO and benefits the later COLO
that based on colo-proxy. the colo-proxy thread can call colo_checkpoint_notify()
directly to notify COLO thread to do checkpoint.

I will not update COLO frame with this patch for now, since it is experimental
and the related patches have been reviewed, i will keep it as a optimized
patch for later COLO. Thanks.

 From bde668d63c182540a013074a70c7a37474aedf94 Mon Sep 17 00:00:00 2001
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Date: Wed, 13 Apr 2016 01:11:09 +0800
Subject: [PATCH] COLO: use timer to notify COLO to do checkpoint

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
---
  include/migration/colo.h      |  1 +
  include/migration/migration.h |  2 ++
  migration/colo.c              | 33 ++++++++++++++++++++-------------
  migration/migration.c         |  1 +
  4 files changed, 24 insertions(+), 13 deletions(-)
diff mbox

Patch

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 87ea6d2..9f0098e 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -38,5 +38,6 @@  void colo_do_failover(MigrationState *s);

  bool colo_shutdown(void);
  void colo_add_buffer_filter(Notifier *notifier, void *data);
+void colo_checkpoint_notify(void *opaque);

  #endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1009918..fed5a14 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -181,6 +181,8 @@  struct MigrationState
      RAMBlock *last_req_rb;

      QemuSemaphore colo_sem;
+    int64_t checkpoint_time;
+    QEMUTimer *delay_timer;
  };

  void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/migration/colo.c b/migration/colo.c
index 56260d8..9fb1a30 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -474,7 +474,7 @@  void colo_add_buffer_filter(Notifier *notifier, void *data)
  static void colo_process_checkpoint(MigrationState *s)
  {
      QEMUSizedBuffer *buffer = NULL;
-    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
      Error *local_err = NULL;
      int ret;

@@ -528,29 +528,22 @@  static void colo_process_checkpoint(MigrationState *s)
      if (ret < 0) {
          goto out;
      }
-
+    timer_mod(s->delay_timer,
+        current_time + s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY]);
      while (s->state == MIGRATION_STATUS_COLO) {
          if (failover_request_is_active()) {
              error_report("failover request");
              goto out;
          }

-        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-        if ((current_time - checkpoint_time <
-            s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY]) &&
-            !colo_shutdown_requested) {
-            int64_t delay_ms;
-
-            delay_ms = s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY] -
-                       (current_time - checkpoint_time);
-            g_usleep(delay_ms * 1000);
+        if (!colo_shutdown_requested) {
+            qemu_sem_wait(&s->colo_sem);
          }
          /* start a colo checkpoint */
          ret = colo_do_checkpoint_transaction(s, buffer);
          if (ret < 0) {
              goto out;
          }
-        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
      }

  out:
@@ -572,7 +565,7 @@  out:

      qsb_free(buffer);
      buffer = NULL;
-
+    timer_del(s->delay_timer);
      /* Hope this not to be too long to wait here */
      qemu_sem_wait(&s->colo_sem);
      qemu_sem_destroy(&s->colo_sem);
@@ -586,12 +579,26 @@  out:
      }
  }

+void colo_checkpoint_notify(void *opaque)
+{
+    MigrationState *s = opaque;
+    int64_t next_notify_time;
+
+    qemu_sem_post(&s->colo_sem);
+    s->checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    next_notify_time = s->checkpoint_time +
+            s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY];
+    timer_mod(s->delay_timer, next_notify_time);
+}
+
  void migrate_start_colo_process(MigrationState *s)
  {
      qemu_mutex_unlock_iothread();
      qemu_sem_init(&s->colo_sem, 0);
      migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                        MIGRATION_STATUS_COLO);
+    s->delay_timer =  timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify,
+                                   s);
      colo_process_checkpoint(s);
      qemu_mutex_lock_iothread();
  }
diff --git a/migration/migration.c b/migration/migration.c
index 3bceecc..8907075 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -825,6 +825,7 @@  void qmp_migrate_set_parameters(bool has_compress_level,
      if (has_x_checkpoint_delay) {
          s->parameters[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY] =
                                                      x_checkpoint_delay;
+        colo_checkpoint_notify(s);
      }
  }