Message ID | 20190724095523.1527-2-quintela@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | migration: fix migrate_cancel problems of multifd | expand |
* Juan Quintela (quintela@redhat.com) wrote: > From: Ivan Ren <renyime@gmail.com> > > When we 'migrate_cancel' a multifd migration, live_migration thread may > go into endless loop in multifd_send_pages functions. > > Reproduce steps: > > (qemu) migrate_set_capability multifd on > (qemu) migrate -d url > (qemu) [wait a while] > (qemu) migrate_cancel > > Then may get live_migration 100% cpu usage in following stack: > > pthread_mutex_lock > qemu_mutex_lock_impl > multifd_send_pages > multifd_queue_page > ram_save_multifd_page > ram_save_target_page > ram_save_host_page > ram_find_and_save_block > ram_find_and_save_block > ram_save_iterate > qemu_savevm_state_iterate > migration_iteration_run > migration_thread > qemu_thread_start > start_thread > clone > > Signed-off-by: Ivan Ren <ivanren@tencent.com> > Message-Id: <1561468699-9819-2-git-send-email-ivanren@tencent.com> > Reviewed-by: Juan Quintela <quintela@redhat.com> > Signed-off-by: Juan Quintela <quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/ram.c | 36 +++++++++++++++++++++++++++++------- > 1 file changed, 29 insertions(+), 7 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 2b0774c2bf..52a2d498e4 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -920,7 +920,7 @@ struct { > * false. > */ > > -static void multifd_send_pages(void) > +static int multifd_send_pages(void) > { > int i; > static int next_channel; > @@ -933,6 +933,11 @@ static void multifd_send_pages(void) > p = &multifd_send_state->params[i]; > > qemu_mutex_lock(&p->mutex); > + if (p->quit) { > + error_report("%s: channel %d has already quit!", __func__, i); > + qemu_mutex_unlock(&p->mutex); > + return -1; > + } > if (!p->pending_job) { > p->pending_job++; > next_channel = (i + 1) % migrate_multifd_channels(); > @@ -951,9 +956,11 @@ static void multifd_send_pages(void) > ram_counters.transferred += transferred;; > qemu_mutex_unlock(&p->mutex); > qemu_sem_post(&p->sem); > + > + return 1; > } > > -static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) > +static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) > { > MultiFDPages_t *pages = multifd_send_state->pages; > > @@ -968,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) > pages->used++; > > if (pages->used < pages->allocated) { > - return; > + return 1; > } > } > > - multifd_send_pages(); > + if (multifd_send_pages() < 0) { > + return -1; > + } > > if (pages->block != block) { > - multifd_queue_page(block, offset); > + return multifd_queue_page(block, offset); > } > + > + return 1; > } > > static void multifd_send_terminate_threads(Error *err) > @@ -1049,7 +1060,10 @@ static void multifd_send_sync_main(void) > return; > } > if (multifd_send_state->pages->used) { > - multifd_send_pages(); > + if (multifd_send_pages() < 0) { > + error_report("%s: multifd_send_pages fail", __func__); > + return; > + } > } > for (i = 0; i < migrate_multifd_channels(); i++) { > MultiFDSendParams *p = &multifd_send_state->params[i]; > @@ -1058,6 +1072,12 @@ static void multifd_send_sync_main(void) > > qemu_mutex_lock(&p->mutex); > > + if (p->quit) { > + error_report("%s: channel %d has already quit", __func__, i); > + qemu_mutex_unlock(&p->mutex); > + return; > + } > + > p->packet_num = multifd_send_state->packet_num++; > p->flags |= MULTIFD_FLAG_SYNC; > p->pending_job++; > @@ -2033,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) > static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, > ram_addr_t offset) > { > - multifd_queue_page(block, offset); > + if (multifd_queue_page(block, offset) < 0) { > + return -1; > + } > ram_counters.normal++; > > return 1; > -- > 2.21.0 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/ram.c b/migration/ram.c index 2b0774c2bf..52a2d498e4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -920,7 +920,7 @@ struct { * false. */ -static void multifd_send_pages(void) +static int multifd_send_pages(void) { int i; static int next_channel; @@ -933,6 +933,11 @@ static void multifd_send_pages(void) p = &multifd_send_state->params[i]; qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit!", __func__, i); + qemu_mutex_unlock(&p->mutex); + return -1; + } if (!p->pending_job) { p->pending_job++; next_channel = (i + 1) % migrate_multifd_channels(); @@ -951,9 +956,11 @@ static void multifd_send_pages(void) ram_counters.transferred += transferred;; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); + + return 1; } -static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) +static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) { MultiFDPages_t *pages = multifd_send_state->pages; @@ -968,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) pages->used++; if (pages->used < pages->allocated) { - return; + return 1; } } - multifd_send_pages(); + if (multifd_send_pages() < 0) { + return -1; + } if (pages->block != block) { - multifd_queue_page(block, offset); + return multifd_queue_page(block, offset); } + + return 1; } static void multifd_send_terminate_threads(Error *err) @@ -1049,7 +1060,10 @@ static void multifd_send_sync_main(void) return; } if (multifd_send_state->pages->used) { - multifd_send_pages(); + if (multifd_send_pages() < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; + } } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1058,6 +1072,12 @@ static void multifd_send_sync_main(void) qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); + return; + } + p->packet_num = multifd_send_state->packet_num++; p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; @@ -2033,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) { - multifd_queue_page(block, offset); + if (multifd_queue_page(block, offset) < 0) { + return -1; + } ram_counters.normal++; return 1;