Message ID | 20220802063907.18882-7-quintela@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Migration: Transmit and detect zero pages in the multifd threads | expand |
On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote: > Use of flags with respect to locking was incensistant. For the > sending side: > - it was set to 0 with mutex held on the multifd channel. > - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread. > - Everything else was done without the mutex held on the multifd channel. > > On the reception side, it is not used on the migration thread, only on > the multifd channels threads. > > So we move it to the multifd channels thread only variables, and we > introduce a new bool sync_needed on the send side to pass that information. > > Signed-off-by: Juan Quintela <quintela@redhat.com> > --- > migration/multifd.h | 10 ++++++---- > migration/multifd.c | 23 +++++++++++++---------- > 2 files changed, 19 insertions(+), 14 deletions(-) > > diff --git a/migration/multifd.h b/migration/multifd.h > index 36f899c56f..a67cefc0a2 100644 > --- a/migration/multifd.h > +++ b/migration/multifd.h > @@ -98,12 +98,12 @@ typedef struct { Just noticed having no name in 'typedef struct' line makes it harder to understand what is going on. MultiFDSendParams > bool running; > /* should this thread finish */ > bool quit; > - /* multifd flags for each packet */ > - uint32_t flags; > /* global number of generated multifd packets */ > uint64_t packet_num; > /* How many bytes have we sent on the last packet */ > uint64_t sent_bytes; > + /* Do we need to do an iteration sync */ > + bool sync_needed; > /* thread has work to do */ > int pending_job; > /* array of pages to sent. > @@ -117,6 +117,8 @@ typedef struct { > > /* pointer to the packet */ > MultiFDPacket_t *packet; > + /* multifd flags for each packet */ > + uint32_t flags; > /* size of the next packet that contains pages */ > uint32_t next_packet_size; > /* packets sent through this channel */ MultiFDRecvParams > @@ -163,8 +165,6 @@ typedef struct { > bool running; > /* should this thread finish */ > bool quit; > - /* multifd flags for each packet */ > - uint32_t flags; > /* global number of generated multifd packets */ > uint64_t packet_num; > > @@ -172,6 +172,8 @@ typedef struct { > > /* pointer to the packet */ > MultiFDPacket_t *packet; > + /* multifd flags for each packet */ > + uint32_t flags; > /* size of the next packet that contains pages */ > uint32_t next_packet_size; > /* packets sent through this channel */ So, IIUC, the struct member flags got moved down (same struct) to an area described as thread-local, meaning it does not need locking. Interesting, I haven't noticed this different areas in the same struct. > diff --git a/migration/multifd.c b/migration/multifd.c > index e25b529235..09a40a9135 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f) > } > > p->packet_num = multifd_send_state->packet_num++; > - p->flags |= MULTIFD_FLAG_SYNC; > + p->sync_needed = true; > p->pending_job++; > qemu_mutex_unlock(&p->mutex); > qemu_sem_post(&p->sem); > @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque) > > if (p->pending_job) { > uint64_t packet_num = p->packet_num; > - uint32_t flags = p->flags; > + p->flags = 0; > + if (p->sync_needed) { > + p->flags |= MULTIFD_FLAG_SYNC; > + p->sync_needed = false; > + } Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC ? [1] Couldn't it be done without the |= , since it's already being set to zero before? (becoming "p->flags = MULTIFD_FLAG_SYNC" ) > p->normal_num = 0; > > if (use_zero_copy_send) { > @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque) > } > } > multifd_send_fill_packet(p); > - p->flags = 0; > p->num_packets++; > p->total_normal_pages += p->normal_num; > p->pages->num = 0; > p->pages->block = NULL; > qemu_mutex_unlock(&p->mutex); > > - trace_multifd_send(p->id, packet_num, p->normal_num, flags, > + trace_multifd_send(p->id, packet_num, p->normal_num, p->flags, > p->next_packet_size); > > if (use_zero_copy_send) { > @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque) > p->pending_job--; > qemu_mutex_unlock(&p->mutex); > > - if (flags & MULTIFD_FLAG_SYNC) { > + if (p->flags & MULTIFD_FLAG_SYNC) { > qemu_sem_post(&p->sem_sync); > } > qemu_sem_post(&multifd_send_state->channels_ready); IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags local var, and thus it can set p->flags = 0 earlier. Seems to not change any behavior AFAICS. > @@ -1090,7 +1093,7 @@ static void *multifd_recv_thread(void *opaque) > rcu_register_thread(); > > while (true) { > - uint32_t flags; > + bool sync_needed = false; > > if (p->quit) { > break; > @@ -1112,11 +1115,11 @@ static void *multifd_recv_thread(void *opaque) > break; > } > > - flags = p->flags; > + trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, > + p->next_packet_size); > + sync_needed = p->flags & MULTIFD_FLAG_SYNC; > /* recv methods don't know how to handle the SYNC flag */ > p->flags &= ~MULTIFD_FLAG_SYNC; > - trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, > - p->next_packet_size); > p->num_packets++; > p->total_normal_pages += p->normal_num; > qemu_mutex_unlock(&p->mutex); > @@ -1128,7 +1131,7 @@ static void *multifd_recv_thread(void *opaque) > } > } > > - if (flags & MULTIFD_FLAG_SYNC) { > + if (sync_needed) { > qemu_sem_post(&multifd_recv_state->sem_sync); > qemu_sem_wait(&p->sem_sync); > } Ok, IIUC this part should have the same behavior as before, but using a bool instead of an u32. Other than question [1], LGTM. FWIW: Reviewed-by: Leonardo Bras <leobras@redhat.com>
Leonardo Brás <leobras@redhat.com> wrote: > On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote: >> Use of flags with respect to locking was incensistant. For the >> sending side: >> - it was set to 0 with mutex held on the multifd channel. >> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread. >> - Everything else was done without the mutex held on the multifd channel. >> >> On the reception side, it is not used on the migration thread, only on >> the multifd channels threads. >> >> So we move it to the multifd channels thread only variables, and we >> introduce a new bool sync_needed on the send side to pass that information. >> >> Signed-off-by: Juan Quintela <quintela@redhat.com> >> --- >> migration/multifd.h | 10 ++++++---- >> migration/multifd.c | 23 +++++++++++++---------- >> 2 files changed, 19 insertions(+), 14 deletions(-) >> >> diff --git a/migration/multifd.h b/migration/multifd.h >> index 36f899c56f..a67cefc0a2 100644 >> --- a/migration/multifd.h >> +++ b/migration/multifd.h >> @@ -98,12 +98,12 @@ typedef struct { > > Just noticed having no name in 'typedef struct' line makes it harder to > understand what is going on. It is common idiom in QEMU. The principal reason is that if you don't want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the best way to achieve that is to do it this way. >> @@ -172,6 +172,8 @@ typedef struct { >> >> /* pointer to the packet */ >> MultiFDPacket_t *packet; >> + /* multifd flags for each packet */ >> + uint32_t flags; >> /* size of the next packet that contains pages */ >> uint32_t next_packet_size; >> /* packets sent through this channel */ > > So, IIUC, the struct member flags got moved down (same struct) to an area > described as thread-local, meaning it does not need locking. > > Interesting, I haven't noticed this different areas in the same struct. It has changed in the last two weeks or so in upstream (it has been on this patchset for several months.) > >> diff --git a/migration/multifd.c b/migration/multifd.c >> index e25b529235..09a40a9135 100644 >> --- a/migration/multifd.c >> +++ b/migration/multifd.c >> @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f) >> } >> >> p->packet_num = multifd_send_state->packet_num++; >> - p->flags |= MULTIFD_FLAG_SYNC; >> + p->sync_needed = true; >> p->pending_job++; >> qemu_mutex_unlock(&p->mutex); >> qemu_sem_post(&p->sem); >> @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque) >> >> if (p->pending_job) { >> uint64_t packet_num = p->packet_num; >> - uint32_t flags = p->flags; >> + p->flags = 0; >> + if (p->sync_needed) { >> + p->flags |= MULTIFD_FLAG_SYNC; >> + p->sync_needed = false; >> + } > > Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC > ? It is a bitmap field, and if there is anything on the future, we need to set it. I agree that when there is only one flag, it seems "weird". > [1] Couldn't it be done without the |= , since it's already being set to zero > before? (becoming "p->flags = MULTIFD_FLAG_SYNC" ) As said, easier to modify later, and also easier if we want to setup a flag by default. I agree that it is a matter of style/taste. >> p->normal_num = 0; >> >> if (use_zero_copy_send) { >> @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque) >> } >> } >> multifd_send_fill_packet(p); >> - p->flags = 0; >> p->num_packets++; >> p->total_normal_pages += p->normal_num; >> p->pages->num = 0; >> p->pages->block = NULL; >> qemu_mutex_unlock(&p->mutex); >> >> - trace_multifd_send(p->id, packet_num, p->normal_num, flags, >> + trace_multifd_send(p->id, packet_num, p->normal_num, p->flags, >> p->next_packet_size); >> >> if (use_zero_copy_send) { >> @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque) >> p->pending_job--; >> qemu_mutex_unlock(&p->mutex); >> >> - if (flags & MULTIFD_FLAG_SYNC) { >> + if (p->flags & MULTIFD_FLAG_SYNC) { >> qemu_sem_post(&p->sem_sync); >> } >> qemu_sem_post(&multifd_send_state->channels_ready); > > IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags > local var, and thus it can set p->flags = 0 earlier. Seems to not change any > behavior AFAICS. The protection of the global flags was being wrong. That is the reason that I decided to change it to the sync_needed. The problem was that at some point we were still sending a packet (that shouldn't have the SYNC flag enabled), but we received a multifd_main_sync() and it got enabled anyways. The easier way that I found te fix it was this way. Problem was difficult to detect, that is the reason that I change it this way. >> - if (flags & MULTIFD_FLAG_SYNC) { >> + if (sync_needed) { >> qemu_sem_post(&multifd_recv_state->sem_sync); >> qemu_sem_wait(&p->sem_sync); >> } > > Ok, IIUC this part should have the same behavior as before, but using a bool > instead of an u32. I changed it to make sure that we only checked the flags at the beggining of the function, with the lock taken. > > FWIW: > Reviewed-by: Leonardo Bras <leobras@redhat.com> Thanks, Juan.
On Fri, Aug 19, 2022 at 7:03 AM Juan Quintela <quintela@redhat.com> wrote: > > Leonardo Brás <leobras@redhat.com> wrote: > > On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote: > >> Use of flags with respect to locking was incensistant. For the > >> sending side: > >> - it was set to 0 with mutex held on the multifd channel. > >> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread. > >> - Everything else was done without the mutex held on the multifd channel. > >> > >> On the reception side, it is not used on the migration thread, only on > >> the multifd channels threads. > >> > >> So we move it to the multifd channels thread only variables, and we > >> introduce a new bool sync_needed on the send side to pass that information. > >> > >> Signed-off-by: Juan Quintela <quintela@redhat.com> > >> --- > >> migration/multifd.h | 10 ++++++---- > >> migration/multifd.c | 23 +++++++++++++---------- > >> 2 files changed, 19 insertions(+), 14 deletions(-) > >> > >> diff --git a/migration/multifd.h b/migration/multifd.h > >> index 36f899c56f..a67cefc0a2 100644 > >> --- a/migration/multifd.h > >> +++ b/migration/multifd.h > >> @@ -98,12 +98,12 @@ typedef struct { > > > > Just noticed having no name in 'typedef struct' line makes it harder to > > understand what is going on. > > It is common idiom in QEMU. The principal reason is that if you don't > want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the > best way to achieve that is to do it this way. I agree, but a comment after the typedef could help reviewing. Something like typedef struct { /* MultiFDSendParams */ ... } MultiFDSendParams Becomes this in diff: diff --git a/migration/multifd.h b/migration/multifd.h index 134e6a7f19..93bb3a7f4a 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -90,6 +90,7 @@ typedef struct { /* MultiFDSendParams */ [...] > > >> @@ -172,6 +172,8 @@ typedef struct { > >> > >> /* pointer to the packet */ > >> MultiFDPacket_t *packet; > >> + /* multifd flags for each packet */ > >> + uint32_t flags; > >> /* size of the next packet that contains pages */ > >> uint32_t next_packet_size; > >> /* packets sent through this channel */ > > > > So, IIUC, the struct member flags got moved down (same struct) to an area > > described as thread-local, meaning it does not need locking. > > > > Interesting, I haven't noticed this different areas in the same struct. > > It has changed in the last two weeks or so in upstream (it has been on > this patchset for several months.) Nice :) > > > > > >> diff --git a/migration/multifd.c b/migration/multifd.c > >> index e25b529235..09a40a9135 100644 > >> --- a/migration/multifd.c > >> +++ b/migration/multifd.c > >> @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f) > >> } > >> > >> p->packet_num = multifd_send_state->packet_num++; > >> - p->flags |= MULTIFD_FLAG_SYNC; > >> + p->sync_needed = true; > >> p->pending_job++; > >> qemu_mutex_unlock(&p->mutex); > >> qemu_sem_post(&p->sem); > >> @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque) > >> > >> if (p->pending_job) { > >> uint64_t packet_num = p->packet_num; > >> - uint32_t flags = p->flags; > >> + p->flags = 0; > >> + if (p->sync_needed) { > >> + p->flags |= MULTIFD_FLAG_SYNC; > >> + p->sync_needed = false; > >> + } > > > > Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC > > ? > > It is a bitmap field, and if there is anything on the future, we need to > set it. I agree that when there is only one flag, it seems "weird". > > > [1] Couldn't it be done without the |= , since it's already being set to zero > > before? (becoming "p->flags = MULTIFD_FLAG_SYNC" ) > > As said, easier to modify later, and also easier if we want to setup a > flag by default. Yeah, I agree. It makes sense now. Thanks > > I agree that it is a matter of style/taste. > > >> p->normal_num = 0; > >> > >> if (use_zero_copy_send) { > >> @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque) > >> } > >> } > >> multifd_send_fill_packet(p); > >> - p->flags = 0; > >> p->num_packets++; > >> p->total_normal_pages += p->normal_num; > >> p->pages->num = 0; > >> p->pages->block = NULL; > >> qemu_mutex_unlock(&p->mutex); > >> > >> - trace_multifd_send(p->id, packet_num, p->normal_num, flags, > >> + trace_multifd_send(p->id, packet_num, p->normal_num, p->flags, > >> p->next_packet_size); > >> > >> if (use_zero_copy_send) { > >> @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque) > >> p->pending_job--; > >> qemu_mutex_unlock(&p->mutex); > >> > >> - if (flags & MULTIFD_FLAG_SYNC) { > >> + if (p->flags & MULTIFD_FLAG_SYNC) { > >> qemu_sem_post(&p->sem_sync); > >> } > >> qemu_sem_post(&multifd_send_state->channels_ready); > > > > IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags > > local var, and thus it can set p->flags = 0 earlier. Seems to not change any > > behavior AFAICS. > > The protection of the global flags was being wrong. That is the reason > that I decided to change it to the sync_needed. > > The problem was that at some point we were still sending a packet (that > shouldn't have the SYNC flag enabled), but we received a > multifd_main_sync() and it got enabled anyways. The easier way that I > found te fix it was this way. > > Problem was difficult to detect, that is the reason that I change it > this way. Oh, I see. > > >> - if (flags & MULTIFD_FLAG_SYNC) { > >> + if (sync_needed) { > >> qemu_sem_post(&multifd_recv_state->sem_sync); > >> qemu_sem_wait(&p->sem_sync); > >> } > > > > Ok, IIUC this part should have the same behavior as before, but using a bool > > instead of an u32. > > I changed it to make sure that we only checked the flags at the > beggining of the function, with the lock taken. Thanks for sharing! Best regards, Leo > > > > > FWIW: > > Reviewed-by: Leonardo Bras <leobras@redhat.com> > > Thanks, Juan. >
Leonardo Bras Soares Passos <leobras@redhat.com> wrote: > On Fri, Aug 19, 2022 at 7:03 AM Juan Quintela <quintela@redhat.com> wrote: >> >> Leonardo Brás <leobras@redhat.com> wrote: >> > On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote: >> >> Use of flags with respect to locking was incensistant. For the >> >> sending side: >> >> - it was set to 0 with mutex held on the multifd channel. >> >> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread. >> >> - Everything else was done without the mutex held on the multifd channel. >> >> >> >> On the reception side, it is not used on the migration thread, only on >> >> the multifd channels threads. >> >> >> >> So we move it to the multifd channels thread only variables, and we >> >> introduce a new bool sync_needed on the send side to pass that information. >> >> >> >> Signed-off-by: Juan Quintela <quintela@redhat.com> >> >> --- >> >> migration/multifd.h | 10 ++++++---- >> >> migration/multifd.c | 23 +++++++++++++---------- >> >> 2 files changed, 19 insertions(+), 14 deletions(-) >> >> >> >> diff --git a/migration/multifd.h b/migration/multifd.h >> >> index 36f899c56f..a67cefc0a2 100644 >> >> --- a/migration/multifd.h >> >> +++ b/migration/multifd.h >> >> @@ -98,12 +98,12 @@ typedef struct { >> > >> > Just noticed having no name in 'typedef struct' line makes it harder to >> > understand what is going on. >> >> It is common idiom in QEMU. The principal reason is that if you don't >> want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the >> best way to achieve that is to do it this way. > > I agree, but a comment after the typedef could help reviewing. Something like > > typedef struct { /* MultiFDSendParams */ > ... > } MultiFDSendParams You have a point here. Not putting a comment, putting the real thing. Thanks, Juan.
diff --git a/migration/multifd.h b/migration/multifd.h index 36f899c56f..a67cefc0a2 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -98,12 +98,12 @@ typedef struct { bool running; /* should this thread finish */ bool quit; - /* multifd flags for each packet */ - uint32_t flags; /* global number of generated multifd packets */ uint64_t packet_num; /* How many bytes have we sent on the last packet */ uint64_t sent_bytes; + /* Do we need to do an iteration sync */ + bool sync_needed; /* thread has work to do */ int pending_job; /* array of pages to sent. @@ -117,6 +117,8 @@ typedef struct { /* pointer to the packet */ MultiFDPacket_t *packet; + /* multifd flags for each packet */ + uint32_t flags; /* size of the next packet that contains pages */ uint32_t next_packet_size; /* packets sent through this channel */ @@ -163,8 +165,6 @@ typedef struct { bool running; /* should this thread finish */ bool quit; - /* multifd flags for each packet */ - uint32_t flags; /* global number of generated multifd packets */ uint64_t packet_num; @@ -172,6 +172,8 @@ typedef struct { /* pointer to the packet */ MultiFDPacket_t *packet; + /* multifd flags for each packet */ + uint32_t flags; /* size of the next packet that contains pages */ uint32_t next_packet_size; /* packets sent through this channel */ diff --git a/migration/multifd.c b/migration/multifd.c index e25b529235..09a40a9135 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f) } p->packet_num = multifd_send_state->packet_num++; - p->flags |= MULTIFD_FLAG_SYNC; + p->sync_needed = true; p->pending_job++; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque) if (p->pending_job) { uint64_t packet_num = p->packet_num; - uint32_t flags = p->flags; + p->flags = 0; + if (p->sync_needed) { + p->flags |= MULTIFD_FLAG_SYNC; + p->sync_needed = false; + } p->normal_num = 0; if (use_zero_copy_send) { @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque) } } multifd_send_fill_packet(p); - p->flags = 0; p->num_packets++; p->total_normal_pages += p->normal_num; p->pages->num = 0; p->pages->block = NULL; qemu_mutex_unlock(&p->mutex); - trace_multifd_send(p->id, packet_num, p->normal_num, flags, + trace_multifd_send(p->id, packet_num, p->normal_num, p->flags, p->next_packet_size); if (use_zero_copy_send) { @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque) p->pending_job--; qemu_mutex_unlock(&p->mutex); - if (flags & MULTIFD_FLAG_SYNC) { + if (p->flags & MULTIFD_FLAG_SYNC) { qemu_sem_post(&p->sem_sync); } qemu_sem_post(&multifd_send_state->channels_ready); @@ -1090,7 +1093,7 @@ static void *multifd_recv_thread(void *opaque) rcu_register_thread(); while (true) { - uint32_t flags; + bool sync_needed = false; if (p->quit) { break; @@ -1112,11 +1115,11 @@ static void *multifd_recv_thread(void *opaque) break; } - flags = p->flags; + trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, + p->next_packet_size); + sync_needed = p->flags & MULTIFD_FLAG_SYNC; /* recv methods don't know how to handle the SYNC flag */ p->flags &= ~MULTIFD_FLAG_SYNC; - trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, - p->next_packet_size); p->num_packets++; p->total_normal_pages += p->normal_num; qemu_mutex_unlock(&p->mutex); @@ -1128,7 +1131,7 @@ static void *multifd_recv_thread(void *opaque) } } - if (flags & MULTIFD_FLAG_SYNC) { + if (sync_needed) { qemu_sem_post(&multifd_recv_state->sem_sync); qemu_sem_wait(&p->sem_sync); }
Use of flags with respect to locking was incensistant. For the sending side: - it was set to 0 with mutex held on the multifd channel. - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread. - Everything else was done without the mutex held on the multifd channel. On the reception side, it is not used on the migration thread, only on the multifd channels threads. So we move it to the multifd channels thread only variables, and we introduce a new bool sync_needed on the send side to pass that information. Signed-off-by: Juan Quintela <quintela@redhat.com> --- migration/multifd.h | 10 ++++++---- migration/multifd.c | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 14 deletions(-)