Message ID | 1533562177-16447-12-git-send-email-lidongchen@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable postcopy RDMA live migration | expand |
* Lidong Chen (jemmy858585@gmail.com) wrote: > ibv_dereg_mr wait for a long time for big memory size virtual server. > > The test result is: > 10GB 326ms > 20GB 699ms > 30GB 1021ms > 40GB 1387ms > 50GB 1712ms > 60GB 2034ms > 70GB 2457ms > 80GB 2807ms > 90GB 3107ms > 100GB 3474ms > 110GB 3735ms > 120GB 4064ms > 130GB 4567ms > 140GB 4886ms > > this will cause the guest os hang for a while when migration finished. > So create a dedicated thread to release rdma resource. > > Signed-off-by: Lidong Chen <lidongchen@tencent.com> > --- > migration/migration.c | 6 ++++++ > migration/migration.h | 3 +++ > migration/rdma.c | 47 +++++++++++++++++++++++++++++++---------------- > 3 files changed, 40 insertions(+), 16 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index f7d6e26..25d9009 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -1499,6 +1499,7 @@ void migrate_init(MigrationState *s) > s->vm_was_running = false; > s->iteration_initial_bytes = 0; > s->threshold_size = 0; > + s->rdma_cleanup_thread_quit = true; > } > > static GSList *migration_blockers; > @@ -1660,6 +1661,10 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, > return false; > } > > + if (s->rdma_cleanup_thread_quit != true) { > + return false; > + } That's not good! We error out without saying anything to the user about why; if we error we must always say why, otherwise we'll get bug reports from people without anyway to debug them. However, we also don't need to; all we need to do is turn this into a semaphore or similar, and then wait for it at the start of 'rdma_start_outgoing_migration' - that way there's no error exit, it just waits a few seconds and then carries on correctly. (Maybe we need to wait in incoming as well to cope with postcopy-recovery). Dave > if (runstate_check(RUN_STATE_INMIGRATE)) { > error_setg(errp, "Guest is waiting for an incoming migration"); > return false; > @@ -3213,6 +3218,7 @@ static void migration_instance_init(Object *obj) > > ms->state = MIGRATION_STATUS_NONE; > ms->mbps = -1; > + ms->rdma_cleanup_thread_quit = true; > qemu_sem_init(&ms->pause_sem, 0); > qemu_mutex_init(&ms->error_mutex); > > diff --git a/migration/migration.h b/migration/migration.h > index 64a7b33..60138dd 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -224,6 +224,9 @@ struct MigrationState > * do not trigger spurious decompression errors. > */ > bool decompress_error_check; > + > + /* Set this when rdma resource have released */ > + bool rdma_cleanup_thread_quit; > }; > > void migrate_set_state(int *state, int old_state, int new_state); > diff --git a/migration/rdma.c b/migration/rdma.c > index e1498f2..3282f35 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -2995,35 +2995,50 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, > } > } > > -static int qio_channel_rdma_close(QIOChannel *ioc, > - Error **errp) > +static void *qio_channel_rdma_close_thread(void *arg) > { > - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > - RDMAContext *rdmain, *rdmaout; > - trace_qemu_rdma_close(); > - > - rdmain = rioc->rdmain; > - if (rdmain) { > - atomic_rcu_set(&rioc->rdmain, NULL); > - } > + RDMAContext **rdma = arg; > + RDMAContext *rdmain = rdma[0]; > + RDMAContext *rdmaout = rdma[1]; > + MigrationState *s = migrate_get_current(); > > - rdmaout = rioc->rdmaout; > - if (rdmaout) { > - atomic_rcu_set(&rioc->rdmaout, NULL); > - } > + rcu_register_thread(); > > synchronize_rcu(); > - > if (rdmain) { > qemu_rdma_cleanup(rdmain); > } > - > if (rdmaout) { > qemu_rdma_cleanup(rdmaout); > } > > g_free(rdmain); > g_free(rdmaout); > + g_free(rdma); > + > + rcu_unregister_thread(); > + s->rdma_cleanup_thread_quit = true; > + return NULL; > +} > + > +static int qio_channel_rdma_close(QIOChannel *ioc, > + Error **errp) > +{ > + QemuThread t; > + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > + RDMAContext **rdma = g_new0(RDMAContext*, 2); > + MigrationState *s = migrate_get_current(); > + > + trace_qemu_rdma_close(); > + if (rioc->rdmain || rioc->rdmaout) { > + rdma[0] = rioc->rdmain; > + rdma[1] = rioc->rdmaout; > + atomic_rcu_set(&rioc->rdmain, NULL); > + atomic_rcu_set(&rioc->rdmaout, NULL); > + s->rdma_cleanup_thread_quit = false; > + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, > + rdma, QEMU_THREAD_DETACHED); > + } > > return 0; > } > -- > 1.8.3.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Fri, Aug 17, 2018 at 10:59 PM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Lidong Chen (jemmy858585@gmail.com) wrote: >> ibv_dereg_mr wait for a long time for big memory size virtual server. >> >> The test result is: >> 10GB 326ms >> 20GB 699ms >> 30GB 1021ms >> 40GB 1387ms >> 50GB 1712ms >> 60GB 2034ms >> 70GB 2457ms >> 80GB 2807ms >> 90GB 3107ms >> 100GB 3474ms >> 110GB 3735ms >> 120GB 4064ms >> 130GB 4567ms >> 140GB 4886ms >> >> this will cause the guest os hang for a while when migration finished. >> So create a dedicated thread to release rdma resource. >> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com> >> --- >> migration/migration.c | 6 ++++++ >> migration/migration.h | 3 +++ >> migration/rdma.c | 47 +++++++++++++++++++++++++++++++---------------- >> 3 files changed, 40 insertions(+), 16 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index f7d6e26..25d9009 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -1499,6 +1499,7 @@ void migrate_init(MigrationState *s) >> s->vm_was_running = false; >> s->iteration_initial_bytes = 0; >> s->threshold_size = 0; >> + s->rdma_cleanup_thread_quit = true; >> } >> >> static GSList *migration_blockers; >> @@ -1660,6 +1661,10 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, >> return false; >> } >> >> + if (s->rdma_cleanup_thread_quit != true) { >> + return false; >> + } > > That's not good! We error out without saying anything to the user > about why; if we error we must always say why, otherwise we'll get > bug reports from people without anyway to debug them. Yes, it should give some error message. > > However, we also don't need to; all we need to do is turn this into > a semaphore or similar, and then wait for it at the start of > 'rdma_start_outgoing_migration' - that way there's no error exit, it > just waits a few seconds and then carries on correctly. > (Maybe we need to wait in incoming as well to cope with > postcopy-recovery). Yes, this should also consider postcopy-recovery case. I will fix it. but rdma_start_outgoing_migration is invoked in the main thread, it's will a little complex to wait. and if it waits for a long time, it's not easy to troubleshooting. Maybe report some error message is an easy way to fix it? Thanks. > > Dave > >> if (runstate_check(RUN_STATE_INMIGRATE)) { >> error_setg(errp, "Guest is waiting for an incoming migration"); >> return false; >> @@ -3213,6 +3218,7 @@ static void migration_instance_init(Object *obj) >> >> ms->state = MIGRATION_STATUS_NONE; >> ms->mbps = -1; >> + ms->rdma_cleanup_thread_quit = true; >> qemu_sem_init(&ms->pause_sem, 0); >> qemu_mutex_init(&ms->error_mutex); >> >> diff --git a/migration/migration.h b/migration/migration.h >> index 64a7b33..60138dd 100644 >> --- a/migration/migration.h >> +++ b/migration/migration.h >> @@ -224,6 +224,9 @@ struct MigrationState >> * do not trigger spurious decompression errors. >> */ >> bool decompress_error_check; >> + >> + /* Set this when rdma resource have released */ >> + bool rdma_cleanup_thread_quit; >> }; >> >> void migrate_set_state(int *state, int old_state, int new_state); >> diff --git a/migration/rdma.c b/migration/rdma.c >> index e1498f2..3282f35 100644 >> --- a/migration/rdma.c >> +++ b/migration/rdma.c >> @@ -2995,35 +2995,50 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, >> } >> } >> >> -static int qio_channel_rdma_close(QIOChannel *ioc, >> - Error **errp) >> +static void *qio_channel_rdma_close_thread(void *arg) >> { >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> - RDMAContext *rdmain, *rdmaout; >> - trace_qemu_rdma_close(); >> - >> - rdmain = rioc->rdmain; >> - if (rdmain) { >> - atomic_rcu_set(&rioc->rdmain, NULL); >> - } >> + RDMAContext **rdma = arg; >> + RDMAContext *rdmain = rdma[0]; >> + RDMAContext *rdmaout = rdma[1]; >> + MigrationState *s = migrate_get_current(); >> >> - rdmaout = rioc->rdmaout; >> - if (rdmaout) { >> - atomic_rcu_set(&rioc->rdmaout, NULL); >> - } >> + rcu_register_thread(); >> >> synchronize_rcu(); >> - >> if (rdmain) { >> qemu_rdma_cleanup(rdmain); >> } >> - >> if (rdmaout) { >> qemu_rdma_cleanup(rdmaout); >> } >> >> g_free(rdmain); >> g_free(rdmaout); >> + g_free(rdma); >> + >> + rcu_unregister_thread(); >> + s->rdma_cleanup_thread_quit = true; >> + return NULL; >> +} >> + >> +static int qio_channel_rdma_close(QIOChannel *ioc, >> + Error **errp) >> +{ >> + QemuThread t; >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> + RDMAContext **rdma = g_new0(RDMAContext*, 2); >> + MigrationState *s = migrate_get_current(); >> + >> + trace_qemu_rdma_close(); >> + if (rioc->rdmain || rioc->rdmaout) { >> + rdma[0] = rioc->rdmain; >> + rdma[1] = rioc->rdmaout; >> + atomic_rcu_set(&rioc->rdmain, NULL); >> + atomic_rcu_set(&rioc->rdmaout, NULL); >> + s->rdma_cleanup_thread_quit = false; >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, >> + rdma, QEMU_THREAD_DETACHED); >> + } >> >> return 0; >> } >> -- >> 1.8.3.1 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/migration.c b/migration/migration.c index f7d6e26..25d9009 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1499,6 +1499,7 @@ void migrate_init(MigrationState *s) s->vm_was_running = false; s->iteration_initial_bytes = 0; s->threshold_size = 0; + s->rdma_cleanup_thread_quit = true; } static GSList *migration_blockers; @@ -1660,6 +1661,10 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (s->rdma_cleanup_thread_quit != true) { + return false; + } + if (runstate_check(RUN_STATE_INMIGRATE)) { error_setg(errp, "Guest is waiting for an incoming migration"); return false; @@ -3213,6 +3218,7 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; + ms->rdma_cleanup_thread_quit = true; qemu_sem_init(&ms->pause_sem, 0); qemu_mutex_init(&ms->error_mutex); diff --git a/migration/migration.h b/migration/migration.h index 64a7b33..60138dd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -224,6 +224,9 @@ struct MigrationState * do not trigger spurious decompression errors. */ bool decompress_error_check; + + /* Set this when rdma resource have released */ + bool rdma_cleanup_thread_quit; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/rdma.c b/migration/rdma.c index e1498f2..3282f35 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2995,35 +2995,50 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, } } -static int qio_channel_rdma_close(QIOChannel *ioc, - Error **errp) +static void *qio_channel_rdma_close_thread(void *arg) { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); - RDMAContext *rdmain, *rdmaout; - trace_qemu_rdma_close(); - - rdmain = rioc->rdmain; - if (rdmain) { - atomic_rcu_set(&rioc->rdmain, NULL); - } + RDMAContext **rdma = arg; + RDMAContext *rdmain = rdma[0]; + RDMAContext *rdmaout = rdma[1]; + MigrationState *s = migrate_get_current(); - rdmaout = rioc->rdmaout; - if (rdmaout) { - atomic_rcu_set(&rioc->rdmaout, NULL); - } + rcu_register_thread(); synchronize_rcu(); - if (rdmain) { qemu_rdma_cleanup(rdmain); } - if (rdmaout) { qemu_rdma_cleanup(rdmaout); } g_free(rdmain); g_free(rdmaout); + g_free(rdma); + + rcu_unregister_thread(); + s->rdma_cleanup_thread_quit = true; + return NULL; +} + +static int qio_channel_rdma_close(QIOChannel *ioc, + Error **errp) +{ + QemuThread t; + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); + RDMAContext **rdma = g_new0(RDMAContext*, 2); + MigrationState *s = migrate_get_current(); + + trace_qemu_rdma_close(); + if (rioc->rdmain || rioc->rdmaout) { + rdma[0] = rioc->rdmain; + rdma[1] = rioc->rdmaout; + atomic_rcu_set(&rioc->rdmain, NULL); + atomic_rcu_set(&rioc->rdmaout, NULL); + s->rdma_cleanup_thread_quit = false; + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, + rdma, QEMU_THREAD_DETACHED); + } return 0; }
ibv_dereg_mr wait for a long time for big memory size virtual server. The test result is: 10GB 326ms 20GB 699ms 30GB 1021ms 40GB 1387ms 50GB 1712ms 60GB 2034ms 70GB 2457ms 80GB 2807ms 90GB 3107ms 100GB 3474ms 110GB 3735ms 120GB 4064ms 130GB 4567ms 140GB 4886ms this will cause the guest os hang for a while when migration finished. So create a dedicated thread to release rdma resource. Signed-off-by: Lidong Chen <lidongchen@tencent.com> --- migration/migration.c | 6 ++++++ migration/migration.h | 3 +++ migration/rdma.c | 47 +++++++++++++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 16 deletions(-)