diff mbox series

[v7,06/12] multifd: Make flags field thread local

Message ID 20220802063907.18882-7-quintela@redhat.com (mailing list archive)
State New, archived
Headers show
Series Migration: Transmit and detect zero pages in the multifd threads | expand

Commit Message

Juan Quintela Aug. 2, 2022, 6:39 a.m. UTC
Use of flags with respect to locking was incensistant.  For the
sending side:
- it was set to 0 with mutex held on the multifd channel.
- MULTIFD_FLAG_SYNC was set with mutex held on the migration thread.
- Everything else was done without the mutex held on the multifd channel.

On the reception side, it is not used on the migration thread, only on
the multifd channels threads.

So we move it to the multifd channels thread only variables, and we
introduce a new bool sync_needed on the send side to pass that information.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/multifd.h | 10 ++++++----
 migration/multifd.c | 23 +++++++++++++----------
 2 files changed, 19 insertions(+), 14 deletions(-)

Comments

Leonardo Bras Aug. 11, 2022, 9:04 a.m. UTC | #1
On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
> Use of flags with respect to locking was incensistant.  For the
> sending side:
> - it was set to 0 with mutex held on the multifd channel.
> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread.
> - Everything else was done without the mutex held on the multifd channel.
> 
> On the reception side, it is not used on the migration thread, only on
> the multifd channels threads.
> 
> So we move it to the multifd channels thread only variables, and we
> introduce a new bool sync_needed on the send side to pass that information.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  migration/multifd.h | 10 ++++++----
>  migration/multifd.c | 23 +++++++++++++----------
>  2 files changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 36f899c56f..a67cefc0a2 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -98,12 +98,12 @@ typedef struct {

Just noticed having no name in 'typedef struct' line makes it harder to
understand what is going on. 

MultiFDSendParams

>      bool running;
>      /* should this thread finish */
>      bool quit;
> -    /* multifd flags for each packet */
> -    uint32_t flags;
>      /* global number of generated multifd packets */
>      uint64_t packet_num;
>      /* How many bytes have we sent on the last packet */
>      uint64_t sent_bytes;
> +    /* Do we need to do an iteration sync */
> +    bool sync_needed;
>      /* thread has work to do */
>      int pending_job;
>      /* array of pages to sent.
> @@ -117,6 +117,8 @@ typedef struct {
>  
>      /* pointer to the packet */
>      MultiFDPacket_t *packet;
> +    /* multifd flags for each packet */
> +    uint32_t flags;
>      /* size of the next packet that contains pages */
>      uint32_t next_packet_size;
>      /* packets sent through this channel */

MultiFDRecvParams

> @@ -163,8 +165,6 @@ typedef struct {
>      bool running;
>      /* should this thread finish */
>      bool quit;
> -    /* multifd flags for each packet */
> -    uint32_t flags;
>      /* global number of generated multifd packets */
>      uint64_t packet_num;
>  
> @@ -172,6 +172,8 @@ typedef struct {
>  
>      /* pointer to the packet */
>      MultiFDPacket_t *packet;
> +    /* multifd flags for each packet */
> +    uint32_t flags;
>      /* size of the next packet that contains pages */
>      uint32_t next_packet_size;
>      /* packets sent through this channel */

So, IIUC, the struct member flags got moved down (same struct) to an area
described as thread-local, meaning it does not need locking. 

Interesting, I haven't noticed this different areas in the same struct.

> diff --git a/migration/multifd.c b/migration/multifd.c
> index e25b529235..09a40a9135 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f)
>          }
>  
>          p->packet_num = multifd_send_state->packet_num++;
> -        p->flags |= MULTIFD_FLAG_SYNC;
> +        p->sync_needed = true;
>          p->pending_job++;
>          qemu_mutex_unlock(&p->mutex);
>          qemu_sem_post(&p->sem);
> @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque)
>  
>          if (p->pending_job) {
>              uint64_t packet_num = p->packet_num;
> -            uint32_t flags = p->flags;
> +            p->flags = 0;
> +            if (p->sync_needed) {
> +                p->flags |= MULTIFD_FLAG_SYNC;
> +                p->sync_needed = false;
> +            }

Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC
?

[1] Couldn't it be done without the |= , since it's already being set to zero
before? (becoming "p->flags = MULTIFD_FLAG_SYNC" )


>              p->normal_num = 0;
>  
>              if (use_zero_copy_send) {
> @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque)
>                  }
>              }
>              multifd_send_fill_packet(p);
> -            p->flags = 0;
>              p->num_packets++;
>              p->total_normal_pages += p->normal_num;
>              p->pages->num = 0;
>              p->pages->block = NULL;
>              qemu_mutex_unlock(&p->mutex);
>  
> -            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
> +            trace_multifd_send(p->id, packet_num, p->normal_num, p->flags,
>                                 p->next_packet_size);
>  
>              if (use_zero_copy_send) {
> @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque)
>              p->pending_job--;
>              qemu_mutex_unlock(&p->mutex);
>  
> -            if (flags & MULTIFD_FLAG_SYNC) {
> +            if (p->flags & MULTIFD_FLAG_SYNC) {
>                  qemu_sem_post(&p->sem_sync);
>              }
>              qemu_sem_post(&multifd_send_state->channels_ready);

IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags
local var, and thus it can set p->flags = 0 earlier. Seems to not change any
behavior AFAICS.



> @@ -1090,7 +1093,7 @@ static void *multifd_recv_thread(void *opaque)
>      rcu_register_thread();
>  
>      while (true) {
> -        uint32_t flags;
> +        bool sync_needed = false;
>  
>          if (p->quit) {
>              break;
> @@ -1112,11 +1115,11 @@ static void *multifd_recv_thread(void *opaque)
>              break;
>          }
>  
> -        flags = p->flags;
> +        trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags,
> +                           p->next_packet_size);
> +        sync_needed = p->flags & MULTIFD_FLAG_SYNC;
>          /* recv methods don't know how to handle the SYNC flag */
>          p->flags &= ~MULTIFD_FLAG_SYNC;
> -        trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
> -                           p->next_packet_size);
>          p->num_packets++;
>          p->total_normal_pages += p->normal_num;
>          qemu_mutex_unlock(&p->mutex);
> @@ -1128,7 +1131,7 @@ static void *multifd_recv_thread(void *opaque)
>              }
>          }
>  
> -        if (flags & MULTIFD_FLAG_SYNC) {
> +        if (sync_needed) {
>              qemu_sem_post(&multifd_recv_state->sem_sync);
>              qemu_sem_wait(&p->sem_sync);
>          }

Ok, IIUC this part should have the same behavior as before, but using a bool
instead of an u32.

Other than question [1], LGTM. 

FWIW:
Reviewed-by: Leonardo Bras <leobras@redhat.com>
Juan Quintela Aug. 19, 2022, 10:03 a.m. UTC | #2
Leonardo Brás <leobras@redhat.com> wrote:
> On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
>> Use of flags with respect to locking was incensistant.  For the
>> sending side:
>> - it was set to 0 with mutex held on the multifd channel.
>> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread.
>> - Everything else was done without the mutex held on the multifd channel.
>> 
>> On the reception side, it is not used on the migration thread, only on
>> the multifd channels threads.
>> 
>> So we move it to the multifd channels thread only variables, and we
>> introduce a new bool sync_needed on the send side to pass that information.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> ---
>>  migration/multifd.h | 10 ++++++----
>>  migration/multifd.c | 23 +++++++++++++----------
>>  2 files changed, 19 insertions(+), 14 deletions(-)
>> 
>> diff --git a/migration/multifd.h b/migration/multifd.h
>> index 36f899c56f..a67cefc0a2 100644
>> --- a/migration/multifd.h
>> +++ b/migration/multifd.h
>> @@ -98,12 +98,12 @@ typedef struct {
>
> Just noticed having no name in 'typedef struct' line makes it harder to
> understand what is going on. 

It is common idiom in QEMU.  The principal reason is that if you don't
want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the
best way to achieve that is to do it this way.

>> @@ -172,6 +172,8 @@ typedef struct {
>>  
>>      /* pointer to the packet */
>>      MultiFDPacket_t *packet;
>> +    /* multifd flags for each packet */
>> +    uint32_t flags;
>>      /* size of the next packet that contains pages */
>>      uint32_t next_packet_size;
>>      /* packets sent through this channel */
>
> So, IIUC, the struct member flags got moved down (same struct) to an area
> described as thread-local, meaning it does not need locking. 
>
> Interesting, I haven't noticed this different areas in the same struct.

It has changed in the last two weeks or so in upstream (it has been on
this patchset for several months.)


>
>> diff --git a/migration/multifd.c b/migration/multifd.c
>> index e25b529235..09a40a9135 100644
>> --- a/migration/multifd.c
>> +++ b/migration/multifd.c
>> @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f)
>>          }
>>  
>>          p->packet_num = multifd_send_state->packet_num++;
>> -        p->flags |= MULTIFD_FLAG_SYNC;
>> +        p->sync_needed = true;
>>          p->pending_job++;
>>          qemu_mutex_unlock(&p->mutex);
>>          qemu_sem_post(&p->sem);
>> @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque)
>>  
>>          if (p->pending_job) {
>>              uint64_t packet_num = p->packet_num;
>> -            uint32_t flags = p->flags;
>> +            p->flags = 0;
>> +            if (p->sync_needed) {
>> +                p->flags |= MULTIFD_FLAG_SYNC;
>> +                p->sync_needed = false;
>> +            }
>
> Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC
> ?

It is a bitmap field, and if there is anything on the future, we need to
set it.  I agree that when there is only one flag, it seems "weird".

> [1] Couldn't it be done without the |= , since it's already being set to zero
> before? (becoming "p->flags = MULTIFD_FLAG_SYNC" )

As said, easier to modify later, and also easier if we want to setup a
flag by default.

I agree that it is a matter of style/taste.

>>              p->normal_num = 0;
>>  
>>              if (use_zero_copy_send) {
>> @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque)
>>                  }
>>              }
>>              multifd_send_fill_packet(p);
>> -            p->flags = 0;
>>              p->num_packets++;
>>              p->total_normal_pages += p->normal_num;
>>              p->pages->num = 0;
>>              p->pages->block = NULL;
>>              qemu_mutex_unlock(&p->mutex);
>>  
>> -            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
>> +            trace_multifd_send(p->id, packet_num, p->normal_num, p->flags,
>>                                 p->next_packet_size);
>>  
>>              if (use_zero_copy_send) {
>> @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque)
>>              p->pending_job--;
>>              qemu_mutex_unlock(&p->mutex);
>>  
>> -            if (flags & MULTIFD_FLAG_SYNC) {
>> +            if (p->flags & MULTIFD_FLAG_SYNC) {
>>                  qemu_sem_post(&p->sem_sync);
>>              }
>>              qemu_sem_post(&multifd_send_state->channels_ready);
>
> IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags
> local var, and thus it can set p->flags = 0 earlier. Seems to not change any
> behavior AFAICS.

The protection of the global flags was being wrong.  That is the reason
that I decided to change it to the sync_needed.

The problem was that at some point we were still sending a packet (that
shouldn't have the SYNC flag enabled), but we received a
multifd_main_sync() and it got enabled anyways.  The easier way that I
found te fix it was this way.

Problem was difficult to detect, that is the reason that I change it
this way.

>> -        if (flags & MULTIFD_FLAG_SYNC) {
>> +        if (sync_needed) {
>>              qemu_sem_post(&multifd_recv_state->sem_sync);
>>              qemu_sem_wait(&p->sem_sync);
>>          }
>
> Ok, IIUC this part should have the same behavior as before, but using a bool
> instead of an u32.

I changed it to make sure that we only checked the flags at the
beggining of the function, with the lock taken.

>
> FWIW:
> Reviewed-by: Leonardo Bras <leobras@redhat.com>

Thanks, Juan.
Leonardo Bras Aug. 20, 2022, 7:24 a.m. UTC | #3
On Fri, Aug 19, 2022 at 7:03 AM Juan Quintela <quintela@redhat.com> wrote:
>
> Leonardo Brás <leobras@redhat.com> wrote:
> > On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
> >> Use of flags with respect to locking was incensistant.  For the
> >> sending side:
> >> - it was set to 0 with mutex held on the multifd channel.
> >> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread.
> >> - Everything else was done without the mutex held on the multifd channel.
> >>
> >> On the reception side, it is not used on the migration thread, only on
> >> the multifd channels threads.
> >>
> >> So we move it to the multifd channels thread only variables, and we
> >> introduce a new bool sync_needed on the send side to pass that information.
> >>
> >> Signed-off-by: Juan Quintela <quintela@redhat.com>
> >> ---
> >>  migration/multifd.h | 10 ++++++----
> >>  migration/multifd.c | 23 +++++++++++++----------
> >>  2 files changed, 19 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/migration/multifd.h b/migration/multifd.h
> >> index 36f899c56f..a67cefc0a2 100644
> >> --- a/migration/multifd.h
> >> +++ b/migration/multifd.h
> >> @@ -98,12 +98,12 @@ typedef struct {
> >
> > Just noticed having no name in 'typedef struct' line makes it harder to
> > understand what is going on.
>
> It is common idiom in QEMU.  The principal reason is that if you don't
> want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the
> best way to achieve that is to do it this way.

I agree, but a comment after the typedef could help reviewing. Something like

typedef struct { /* MultiFDSendParams */
...
} MultiFDSendParams

Becomes this in diff:

diff --git a/migration/multifd.h b/migration/multifd.h
index 134e6a7f19..93bb3a7f4a 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -90,6 +90,7 @@ typedef struct { /* MultiFDSendParams */
[...]


>
> >> @@ -172,6 +172,8 @@ typedef struct {
> >>
> >>      /* pointer to the packet */
> >>      MultiFDPacket_t *packet;
> >> +    /* multifd flags for each packet */
> >> +    uint32_t flags;
> >>      /* size of the next packet that contains pages */
> >>      uint32_t next_packet_size;
> >>      /* packets sent through this channel */
> >
> > So, IIUC, the struct member flags got moved down (same struct) to an area
> > described as thread-local, meaning it does not need locking.
> >
> > Interesting, I haven't noticed this different areas in the same struct.
>
> It has changed in the last two weeks or so in upstream (it has been on
> this patchset for several months.)

Nice :)

>
>
> >
> >> diff --git a/migration/multifd.c b/migration/multifd.c
> >> index e25b529235..09a40a9135 100644
> >> --- a/migration/multifd.c
> >> +++ b/migration/multifd.c
> >> @@ -602,7 +602,7 @@ int multifd_send_sync_main(QEMUFile *f)
> >>          }
> >>
> >>          p->packet_num = multifd_send_state->packet_num++;
> >> -        p->flags |= MULTIFD_FLAG_SYNC;
> >> +        p->sync_needed = true;
> >>          p->pending_job++;
> >>          qemu_mutex_unlock(&p->mutex);
> >>          qemu_sem_post(&p->sem);
> >> @@ -658,7 +658,11 @@ static void *multifd_send_thread(void *opaque)
> >>
> >>          if (p->pending_job) {
> >>              uint64_t packet_num = p->packet_num;
> >> -            uint32_t flags = p->flags;
> >> +            p->flags = 0;
> >> +            if (p->sync_needed) {
> >> +                p->flags |= MULTIFD_FLAG_SYNC;
> >> +                p->sync_needed = false;
> >> +            }
> >
> > Any particular reason why doing p->flags = 0, then p->flags |= MULTIFD_FLAG_SYNC
> > ?
>
> It is a bitmap field, and if there is anything on the future, we need to
> set it.  I agree that when there is only one flag, it seems "weird".
>
> > [1] Couldn't it be done without the |= , since it's already being set to zero
> > before? (becoming "p->flags = MULTIFD_FLAG_SYNC" )
>
> As said, easier to modify later, and also easier if we want to setup a
> flag by default.

Yeah, I agree. It makes sense now.

Thanks

>
> I agree that it is a matter of style/taste.
>
> >>              p->normal_num = 0;
> >>
> >>              if (use_zero_copy_send) {
> >> @@ -680,14 +684,13 @@ static void *multifd_send_thread(void *opaque)
> >>                  }
> >>              }
> >>              multifd_send_fill_packet(p);
> >> -            p->flags = 0;
> >>              p->num_packets++;
> >>              p->total_normal_pages += p->normal_num;
> >>              p->pages->num = 0;
> >>              p->pages->block = NULL;
> >>              qemu_mutex_unlock(&p->mutex);
> >>
> >> -            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
> >> +            trace_multifd_send(p->id, packet_num, p->normal_num, p->flags,
> >>                                 p->next_packet_size);
> >>
> >>              if (use_zero_copy_send) {
> >> @@ -715,7 +718,7 @@ static void *multifd_send_thread(void *opaque)
> >>              p->pending_job--;
> >>              qemu_mutex_unlock(&p->mutex);
> >>
> >> -            if (flags & MULTIFD_FLAG_SYNC) {
> >> +            if (p->flags & MULTIFD_FLAG_SYNC) {
> >>                  qemu_sem_post(&p->sem_sync);
> >>              }
> >>              qemu_sem_post(&multifd_send_state->channels_ready);
> >
> > IIUC it uses p->sync_needed to keep the sync info, instead of the previous flags
> > local var, and thus it can set p->flags = 0 earlier. Seems to not change any
> > behavior AFAICS.
>
> The protection of the global flags was being wrong.  That is the reason
> that I decided to change it to the sync_needed.
>
> The problem was that at some point we were still sending a packet (that
> shouldn't have the SYNC flag enabled), but we received a
> multifd_main_sync() and it got enabled anyways.  The easier way that I
> found te fix it was this way.
>
> Problem was difficult to detect, that is the reason that I change it
> this way.

Oh, I see.

>
> >> -        if (flags & MULTIFD_FLAG_SYNC) {
> >> +        if (sync_needed) {
> >>              qemu_sem_post(&multifd_recv_state->sem_sync);
> >>              qemu_sem_wait(&p->sem_sync);
> >>          }
> >
> > Ok, IIUC this part should have the same behavior as before, but using a bool
> > instead of an u32.
>
> I changed it to make sure that we only checked the flags at the
> beggining of the function, with the lock taken.

Thanks for sharing!

Best regards,
Leo

>
> >
> > FWIW:
> > Reviewed-by: Leonardo Bras <leobras@redhat.com>
>
> Thanks, Juan.
>
Juan Quintela Aug. 23, 2022, 1 p.m. UTC | #4
Leonardo Bras Soares Passos <leobras@redhat.com> wrote:
> On Fri, Aug 19, 2022 at 7:03 AM Juan Quintela <quintela@redhat.com> wrote:
>>
>> Leonardo Brás <leobras@redhat.com> wrote:
>> > On Tue, 2022-08-02 at 08:39 +0200, Juan Quintela wrote:
>> >> Use of flags with respect to locking was incensistant.  For the
>> >> sending side:
>> >> - it was set to 0 with mutex held on the multifd channel.
>> >> - MULTIFD_FLAG_SYNC was set with mutex held on the migration thread.
>> >> - Everything else was done without the mutex held on the multifd channel.
>> >>
>> >> On the reception side, it is not used on the migration thread, only on
>> >> the multifd channels threads.
>> >>
>> >> So we move it to the multifd channels thread only variables, and we
>> >> introduce a new bool sync_needed on the send side to pass that information.
>> >>
>> >> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> >> ---
>> >>  migration/multifd.h | 10 ++++++----
>> >>  migration/multifd.c | 23 +++++++++++++----------
>> >>  2 files changed, 19 insertions(+), 14 deletions(-)
>> >>
>> >> diff --git a/migration/multifd.h b/migration/multifd.h
>> >> index 36f899c56f..a67cefc0a2 100644
>> >> --- a/migration/multifd.h
>> >> +++ b/migration/multifd.h
>> >> @@ -98,12 +98,12 @@ typedef struct {
>> >
>> > Just noticed having no name in 'typedef struct' line makes it harder to
>> > understand what is going on.
>>
>> It is common idiom in QEMU.  The principal reason is that if you don't
>> want anyone to use "struct MultiFDSendParams" but MultiFDSendParams, the
>> best way to achieve that is to do it this way.
>
> I agree, but a comment after the typedef could help reviewing. Something like
>
> typedef struct { /* MultiFDSendParams */
> ...
> } MultiFDSendParams

You have a point here.  Not putting a comment, putting the real thing.

Thanks, Juan.
diff mbox series

Patch

diff --git a/migration/multifd.h b/migration/multifd.h
index 36f899c56f..a67cefc0a2 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -98,12 +98,12 @@  typedef struct {
     bool running;
     /* should this thread finish */
     bool quit;
-    /* multifd flags for each packet */
-    uint32_t flags;
     /* global number of generated multifd packets */
     uint64_t packet_num;
     /* How many bytes have we sent on the last packet */
     uint64_t sent_bytes;
+    /* Do we need to do an iteration sync */
+    bool sync_needed;
     /* thread has work to do */
     int pending_job;
     /* array of pages to sent.
@@ -117,6 +117,8 @@  typedef struct {
 
     /* pointer to the packet */
     MultiFDPacket_t *packet;
+    /* multifd flags for each packet */
+    uint32_t flags;
     /* size of the next packet that contains pages */
     uint32_t next_packet_size;
     /* packets sent through this channel */
@@ -163,8 +165,6 @@  typedef struct {
     bool running;
     /* should this thread finish */
     bool quit;
-    /* multifd flags for each packet */
-    uint32_t flags;
     /* global number of generated multifd packets */
     uint64_t packet_num;
 
@@ -172,6 +172,8 @@  typedef struct {
 
     /* pointer to the packet */
     MultiFDPacket_t *packet;
+    /* multifd flags for each packet */
+    uint32_t flags;
     /* size of the next packet that contains pages */
     uint32_t next_packet_size;
     /* packets sent through this channel */
diff --git a/migration/multifd.c b/migration/multifd.c
index e25b529235..09a40a9135 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -602,7 +602,7 @@  int multifd_send_sync_main(QEMUFile *f)
         }
 
         p->packet_num = multifd_send_state->packet_num++;
-        p->flags |= MULTIFD_FLAG_SYNC;
+        p->sync_needed = true;
         p->pending_job++;
         qemu_mutex_unlock(&p->mutex);
         qemu_sem_post(&p->sem);
@@ -658,7 +658,11 @@  static void *multifd_send_thread(void *opaque)
 
         if (p->pending_job) {
             uint64_t packet_num = p->packet_num;
-            uint32_t flags = p->flags;
+            p->flags = 0;
+            if (p->sync_needed) {
+                p->flags |= MULTIFD_FLAG_SYNC;
+                p->sync_needed = false;
+            }
             p->normal_num = 0;
 
             if (use_zero_copy_send) {
@@ -680,14 +684,13 @@  static void *multifd_send_thread(void *opaque)
                 }
             }
             multifd_send_fill_packet(p);
-            p->flags = 0;
             p->num_packets++;
             p->total_normal_pages += p->normal_num;
             p->pages->num = 0;
             p->pages->block = NULL;
             qemu_mutex_unlock(&p->mutex);
 
-            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
+            trace_multifd_send(p->id, packet_num, p->normal_num, p->flags,
                                p->next_packet_size);
 
             if (use_zero_copy_send) {
@@ -715,7 +718,7 @@  static void *multifd_send_thread(void *opaque)
             p->pending_job--;
             qemu_mutex_unlock(&p->mutex);
 
-            if (flags & MULTIFD_FLAG_SYNC) {
+            if (p->flags & MULTIFD_FLAG_SYNC) {
                 qemu_sem_post(&p->sem_sync);
             }
             qemu_sem_post(&multifd_send_state->channels_ready);
@@ -1090,7 +1093,7 @@  static void *multifd_recv_thread(void *opaque)
     rcu_register_thread();
 
     while (true) {
-        uint32_t flags;
+        bool sync_needed = false;
 
         if (p->quit) {
             break;
@@ -1112,11 +1115,11 @@  static void *multifd_recv_thread(void *opaque)
             break;
         }
 
-        flags = p->flags;
+        trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags,
+                           p->next_packet_size);
+        sync_needed = p->flags & MULTIFD_FLAG_SYNC;
         /* recv methods don't know how to handle the SYNC flag */
         p->flags &= ~MULTIFD_FLAG_SYNC;
-        trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
-                           p->next_packet_size);
         p->num_packets++;
         p->total_normal_pages += p->normal_num;
         qemu_mutex_unlock(&p->mutex);
@@ -1128,7 +1131,7 @@  static void *multifd_recv_thread(void *opaque)
             }
         }
 
-        if (flags & MULTIFD_FLAG_SYNC) {
+        if (sync_needed) {
             qemu_sem_post(&multifd_recv_state->sem_sync);
             qemu_sem_wait(&p->sem_sync);
         }