Message ID | f60b0a8e2fadaaec792e04819dfc46951842d6ba.1589193382.git.lukasstraub2@web.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | colo: migration related bugfixes | expand |
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > -----Original Message----- > From: Lukas Straub [mailto:lukasstraub2@web.de] > Sent: Monday, May 11, 2020 7:11 PM > To: qemu-devel <qemu-devel@nongnu.org> > Cc: Zhanghailiang <zhang.zhanghailiang@huawei.com>; Juan Quintela > <quintela@redhat.com>; Dr. David Alan Gilbert <dgilbert@redhat.com> > Subject: [PATCH 4/6] migration/colo.c: Relaunch failover even if there was an > error > > If vmstate_loading is true, secondary_vm_do_failover will set failover status > to FAILOVER_STATUS_RELAUNCH and return success without initiating > failover. However, if there is an error during the vmstate_loading section, > failover isn't relaunched. Instead we then wait for failover on > colo_incoming_sem. > > Fix this by relaunching failover even if there was an error. Also, to make this > work properly, set vmstate_loading to false when returning during the > vmstate_loading section. > > Signed-off-by: Lukas Straub <lukasstraub2@web.de> > --- > migration/colo.c | 17 ++++++++++++----- > 1 file changed, 12 insertions(+), 5 deletions(-) > > diff --git a/migration/colo.c b/migration/colo.c index > 2947363ae5..a69782efc5 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -743,6 +743,7 @@ static void > colo_incoming_process_checkpoint(MigrationIncomingState *mis, > ret = qemu_load_device_state(fb); > if (ret < 0) { > error_setg(errp, "COLO: load device state failed"); > + vmstate_loading = false; > qemu_mutex_unlock_iothread(); > return; > } > @@ -751,6 +752,7 @@ static void > colo_incoming_process_checkpoint(MigrationIncomingState *mis, > replication_get_error_all(&local_err); > if (local_err) { > error_propagate(errp, local_err); > + vmstate_loading = false; > qemu_mutex_unlock_iothread(); > return; > } > @@ -759,6 +761,7 @@ static void > colo_incoming_process_checkpoint(MigrationIncomingState *mis, > replication_do_checkpoint_all(&local_err); > if (local_err) { > error_propagate(errp, local_err); > + vmstate_loading = false; > qemu_mutex_unlock_iothread(); > return; > } > @@ -770,6 +773,7 @@ static void > colo_incoming_process_checkpoint(MigrationIncomingState *mis, > > if (local_err) { > error_propagate(errp, local_err); > + vmstate_loading = false; > qemu_mutex_unlock_iothread(); > return; > } > @@ -780,9 +784,6 @@ static void > colo_incoming_process_checkpoint(MigrationIncomingState *mis, > qemu_mutex_unlock_iothread(); > > if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > - failover_set_state(FAILOVER_STATUS_RELAUNCH, > - FAILOVER_STATUS_NONE); > - failover_request_active(NULL); > return; > } > > @@ -881,6 +882,14 @@ void *colo_process_incoming_thread(void > *opaque) > error_report_err(local_err); > break; > } > + > + if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > + failover_set_state(FAILOVER_STATUS_RELAUNCH, > + FAILOVER_STATUS_NONE); > + failover_request_active(NULL); > + break; > + } > + > if (failover_get_state() != FAILOVER_STATUS_NONE) { > error_report("failover request"); > break; > @@ -888,8 +897,6 @@ void *colo_process_incoming_thread(void *opaque) > } > > out: > - vmstate_loading = false; > - > /* > * There are only two reasons we can get here, some error happened > * or the user triggered failover. > -- > 2.20.1
diff --git a/migration/colo.c b/migration/colo.c index 2947363ae5..a69782efc5 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -743,6 +743,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, ret = qemu_load_device_state(fb); if (ret < 0) { error_setg(errp, "COLO: load device state failed"); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -751,6 +752,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, replication_get_error_all(&local_err); if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -759,6 +761,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, replication_do_checkpoint_all(&local_err); if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -770,6 +773,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); + vmstate_loading = false; qemu_mutex_unlock_iothread(); return; } @@ -780,9 +784,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { - failover_set_state(FAILOVER_STATUS_RELAUNCH, - FAILOVER_STATUS_NONE); - failover_request_active(NULL); return; } @@ -881,6 +882,14 @@ void *colo_process_incoming_thread(void *opaque) error_report_err(local_err); break; } + + if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { + failover_set_state(FAILOVER_STATUS_RELAUNCH, + FAILOVER_STATUS_NONE); + failover_request_active(NULL); + break; + } + if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); break; @@ -888,8 +897,6 @@ void *colo_process_incoming_thread(void *opaque) } out: - vmstate_loading = false; - /* * There are only two reasons we can get here, some error happened * or the user triggered failover.
If vmstate_loading is true, secondary_vm_do_failover will set failover status to FAILOVER_STATUS_RELAUNCH and return success without initiating failover. However, if there is an error during the vmstate_loading section, failover isn't relaunched. Instead we then wait for failover on colo_incoming_sem. Fix this by relaunching failover even if there was an error. Also, to make this work properly, set vmstate_loading to false when returning during the vmstate_loading section. Signed-off-by: Lukas Straub <lukasstraub2@web.de> --- migration/colo.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)