Message ID | 1527673416-31268-11-git-send-email-lidongchen@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
* Lidong Chen (jemmy858585@gmail.com) wrote: > ibv_dereg_mr wait for a long time for big memory size virtual server. > > The test result is: > 10GB 326ms > 20GB 699ms > 30GB 1021ms > 40GB 1387ms > 50GB 1712ms > 60GB 2034ms > 70GB 2457ms > 80GB 2807ms > 90GB 3107ms > 100GB 3474ms > 110GB 3735ms > 120GB 4064ms > 130GB 4567ms > 140GB 4886ms > > this will cause the guest os hang for a while when migration finished. > So create a dedicated thread to release rdma resource. > > Signed-off-by: Lidong Chen <lidongchen@tencent.com> > --- > migration/rdma.c | 21 +++++++++++++++++---- > 1 file changed, 17 insertions(+), 4 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index dfa4f77..1b9e261 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, > } > } > > -static int qio_channel_rdma_close(QIOChannel *ioc, > - Error **errp) > +static void *qio_channel_rdma_close_thread(void *arg) > { > - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > + QIOChannelRDMA *rioc = arg; > RDMAContext *rdmain, *rdmaout; > - trace_qemu_rdma_close(); > + > + rcu_register_thread(); > > rdmain = rioc->rdmain; > if (rdmain) { > @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc, > g_free(rdmain); > g_free(rdmaout); > > + rcu_unregister_thread(); > + return NULL; > +} > + > +static int qio_channel_rdma_close(QIOChannel *ioc, > + Error **errp) > +{ > + QemuThread t; > + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > + trace_qemu_rdma_close(); > + > + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, > + rioc, QEMU_THREAD_DETACHED); I don't think this can be this simple; consider the lock in patch 4; now that lock means qui_channel_rdma_close() can't be called in parallel; but with this change it means: f->lock qemu_thread_create (1) !f->lock f->lock qemu_thread_create !f->lock so we don't really protect the thing you were trying to lock Dave > return 0; > } > > -- > 1.8.3.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Lidong Chen (jemmy858585@gmail.com) wrote: >> ibv_dereg_mr wait for a long time for big memory size virtual server. >> >> The test result is: >> 10GB 326ms >> 20GB 699ms >> 30GB 1021ms >> 40GB 1387ms >> 50GB 1712ms >> 60GB 2034ms >> 70GB 2457ms >> 80GB 2807ms >> 90GB 3107ms >> 100GB 3474ms >> 110GB 3735ms >> 120GB 4064ms >> 130GB 4567ms >> 140GB 4886ms >> >> this will cause the guest os hang for a while when migration finished. >> So create a dedicated thread to release rdma resource. >> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com> >> --- >> migration/rdma.c | 21 +++++++++++++++++---- >> 1 file changed, 17 insertions(+), 4 deletions(-) >> >> diff --git a/migration/rdma.c b/migration/rdma.c >> index dfa4f77..1b9e261 100644 >> --- a/migration/rdma.c >> +++ b/migration/rdma.c >> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, >> } >> } >> >> -static int qio_channel_rdma_close(QIOChannel *ioc, >> - Error **errp) >> +static void *qio_channel_rdma_close_thread(void *arg) >> { >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> + QIOChannelRDMA *rioc = arg; >> RDMAContext *rdmain, *rdmaout; >> - trace_qemu_rdma_close(); >> + >> + rcu_register_thread(); >> >> rdmain = rioc->rdmain; >> if (rdmain) { >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc, >> g_free(rdmain); >> g_free(rdmaout); >> >> + rcu_unregister_thread(); >> + return NULL; >> +} >> + >> +static int qio_channel_rdma_close(QIOChannel *ioc, >> + Error **errp) >> +{ >> + QemuThread t; >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> + trace_qemu_rdma_close(); >> + >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, >> + rioc, QEMU_THREAD_DETACHED); > > I don't think this can be this simple; consider the lock in patch 4; > now that lock means qui_channel_rdma_close() can't be called in > parallel; but with this change it means: > > > f->lock > qemu_thread_create (1) > !f->lock > f->lock > qemu_thread_create > !f->lock > > so we don't really protect the thing you were trying to lock yes, I should not use rioc as the thread arg. static int qio_channel_rdma_close(QIOChannel *ioc, Error **errp) { QemuThread t; RDMAContext *rdma[2]; QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); trace_qemu_rdma_close(); if (rioc->rdmain || rioc->rdmaout) { rdma[0] = rioc->rdmain; rdma[1] = rioc->rdmaout; qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, rdma, QEMU_THREAD_DETACHED); rioc->rdmain = NULL; rioc->rdmaout = NULL; } return 0; } > > Dave > >> return 0; >> } >> >> -- >> 1.8.3.1 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* 858585 jemmy (jemmy858585@gmail.com) wrote: > On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > * Lidong Chen (jemmy858585@gmail.com) wrote: > >> ibv_dereg_mr wait for a long time for big memory size virtual server. > >> > >> The test result is: > >> 10GB 326ms > >> 20GB 699ms > >> 30GB 1021ms > >> 40GB 1387ms > >> 50GB 1712ms > >> 60GB 2034ms > >> 70GB 2457ms > >> 80GB 2807ms > >> 90GB 3107ms > >> 100GB 3474ms > >> 110GB 3735ms > >> 120GB 4064ms > >> 130GB 4567ms > >> 140GB 4886ms > >> > >> this will cause the guest os hang for a while when migration finished. > >> So create a dedicated thread to release rdma resource. > >> > >> Signed-off-by: Lidong Chen <lidongchen@tencent.com> > >> --- > >> migration/rdma.c | 21 +++++++++++++++++---- > >> 1 file changed, 17 insertions(+), 4 deletions(-) > >> > >> diff --git a/migration/rdma.c b/migration/rdma.c > >> index dfa4f77..1b9e261 100644 > >> --- a/migration/rdma.c > >> +++ b/migration/rdma.c > >> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, > >> } > >> } > >> > >> -static int qio_channel_rdma_close(QIOChannel *ioc, > >> - Error **errp) > >> +static void *qio_channel_rdma_close_thread(void *arg) > >> { > >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > >> + QIOChannelRDMA *rioc = arg; > >> RDMAContext *rdmain, *rdmaout; > >> - trace_qemu_rdma_close(); > >> + > >> + rcu_register_thread(); > >> > >> rdmain = rioc->rdmain; > >> if (rdmain) { > >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc, > >> g_free(rdmain); > >> g_free(rdmaout); > >> > >> + rcu_unregister_thread(); > >> + return NULL; > >> +} > >> + > >> +static int qio_channel_rdma_close(QIOChannel *ioc, > >> + Error **errp) > >> +{ > >> + QemuThread t; > >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > >> + trace_qemu_rdma_close(); > >> + > >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, > >> + rioc, QEMU_THREAD_DETACHED); > > > > I don't think this can be this simple; consider the lock in patch 4; > > now that lock means qui_channel_rdma_close() can't be called in > > parallel; but with this change it means: > > > > > > f->lock > > qemu_thread_create (1) > > !f->lock > > f->lock > > qemu_thread_create > > !f->lock > > > > so we don't really protect the thing you were trying to lock > > yes, I should not use rioc as the thread arg. > > static int qio_channel_rdma_close(QIOChannel *ioc, > Error **errp) > { > QemuThread t; > RDMAContext *rdma[2]; > QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); > > trace_qemu_rdma_close(); > if (rioc->rdmain || rioc->rdmaout) { > rdma[0] = rioc->rdmain; > rdma[1] = rioc->rdmaout; > qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, > rdma, QEMU_THREAD_DETACHED); > rioc->rdmain = NULL; > rioc->rdmaout = NULL; Is it safe to close both directions at once? For example, if you get the close from the return path thread, might the main thread be still using it's QEMUFile in the opposite direction; it'll call close a little bit later? Dave > } > return 0; > } > > > > > Dave > > > >> return 0; > >> } > >> > >> -- > >> 1.8.3.1 > >> > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, May 31, 2018 at 6:55 PM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * 858585 jemmy (jemmy858585@gmail.com) wrote: >> On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert >> <dgilbert@redhat.com> wrote: >> > * Lidong Chen (jemmy858585@gmail.com) wrote: >> >> ibv_dereg_mr wait for a long time for big memory size virtual server. >> >> >> >> The test result is: >> >> 10GB 326ms >> >> 20GB 699ms >> >> 30GB 1021ms >> >> 40GB 1387ms >> >> 50GB 1712ms >> >> 60GB 2034ms >> >> 70GB 2457ms >> >> 80GB 2807ms >> >> 90GB 3107ms >> >> 100GB 3474ms >> >> 110GB 3735ms >> >> 120GB 4064ms >> >> 130GB 4567ms >> >> 140GB 4886ms >> >> >> >> this will cause the guest os hang for a while when migration finished. >> >> So create a dedicated thread to release rdma resource. >> >> >> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com> >> >> --- >> >> migration/rdma.c | 21 +++++++++++++++++---- >> >> 1 file changed, 17 insertions(+), 4 deletions(-) >> >> >> >> diff --git a/migration/rdma.c b/migration/rdma.c >> >> index dfa4f77..1b9e261 100644 >> >> --- a/migration/rdma.c >> >> +++ b/migration/rdma.c >> >> @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, >> >> } >> >> } >> >> >> >> -static int qio_channel_rdma_close(QIOChannel *ioc, >> >> - Error **errp) >> >> +static void *qio_channel_rdma_close_thread(void *arg) >> >> { >> >> - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> >> + QIOChannelRDMA *rioc = arg; >> >> RDMAContext *rdmain, *rdmaout; >> >> - trace_qemu_rdma_close(); >> >> + >> >> + rcu_register_thread(); >> >> >> >> rdmain = rioc->rdmain; >> >> if (rdmain) { >> >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc, >> >> g_free(rdmain); >> >> g_free(rdmaout); >> >> >> >> + rcu_unregister_thread(); >> >> + return NULL; >> >> +} >> >> + >> >> +static int qio_channel_rdma_close(QIOChannel *ioc, >> >> + Error **errp) >> >> +{ >> >> + QemuThread t; >> >> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> >> + trace_qemu_rdma_close(); >> >> + >> >> + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, >> >> + rioc, QEMU_THREAD_DETACHED); >> > >> > I don't think this can be this simple; consider the lock in patch 4; >> > now that lock means qui_channel_rdma_close() can't be called in >> > parallel; but with this change it means: >> > >> > >> > f->lock >> > qemu_thread_create (1) >> > !f->lock >> > f->lock >> > qemu_thread_create >> > !f->lock >> > >> > so we don't really protect the thing you were trying to lock >> >> yes, I should not use rioc as the thread arg. >> >> static int qio_channel_rdma_close(QIOChannel *ioc, >> Error **errp) >> { >> QemuThread t; >> RDMAContext *rdma[2]; >> QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); >> >> trace_qemu_rdma_close(); >> if (rioc->rdmain || rioc->rdmaout) { >> rdma[0] = rioc->rdmain; >> rdma[1] = rioc->rdmaout; >> qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, >> rdma, QEMU_THREAD_DETACHED); >> rioc->rdmain = NULL; >> rioc->rdmaout = NULL; > > Is it safe to close both directions at once? > For example, if you get the close from the return path thread, might the > main thread be still using it's QEMUFile in the opposite direction; > it'll call close a little bit later? I use rcu to protect this. qio_channel_rdma_close_thread call synchronize_rcu, it will wait until all other thread not access rdmain and rdmaout. And if the return patch close the qemu file, the migration thread qemu file will be set error soon because the QIOChannel is closed. QIOChannelSocket also work this way. > > Dave > >> } >> return 0; >> } >> >> > >> > Dave >> > >> >> return 0; >> >> } >> >> >> >> -- >> >> 1.8.3.1 >> >> >> > -- >> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/rdma.c b/migration/rdma.c index dfa4f77..1b9e261 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2979,12 +2979,12 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, } } -static int qio_channel_rdma_close(QIOChannel *ioc, - Error **errp) +static void *qio_channel_rdma_close_thread(void *arg) { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); + QIOChannelRDMA *rioc = arg; RDMAContext *rdmain, *rdmaout; - trace_qemu_rdma_close(); + + rcu_register_thread(); rdmain = rioc->rdmain; if (rdmain) { @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc, g_free(rdmain); g_free(rdmaout); + rcu_unregister_thread(); + return NULL; +} + +static int qio_channel_rdma_close(QIOChannel *ioc, + Error **errp) +{ + QemuThread t; + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); + trace_qemu_rdma_close(); + + qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread, + rioc, QEMU_THREAD_DETACHED); return 0; }
ibv_dereg_mr wait for a long time for big memory size virtual server. The test result is: 10GB 326ms 20GB 699ms 30GB 1021ms 40GB 1387ms 50GB 1712ms 60GB 2034ms 70GB 2457ms 80GB 2807ms 90GB 3107ms 100GB 3474ms 110GB 3735ms 120GB 4064ms 130GB 4567ms 140GB 4886ms this will cause the guest os hang for a while when migration finished. So create a dedicated thread to release rdma resource. Signed-off-by: Lidong Chen <lidongchen@tencent.com> --- migration/rdma.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-)