diff mbox series

migration: Fix qmp_query_migrate mbps value

Message ID 20240219194457.26923-1-farosas@suse.de (mailing list archive)
State New, archived
Headers show
Series migration: Fix qmp_query_migrate mbps value | expand

Commit Message

Fabiano Rosas Feb. 19, 2024, 7:44 p.m. UTC
The QMP command query_migrate might see incorrect throughput numbers
if it runs after we've set the migration completion status but before
migration_calculate_complete() has updated s->total_time and s->mbps.

The migration status would show COMPLETED, but the throughput value
would be the one from the last iteration and not the one from the
whole migration. This will usually be a larger value due to the time
period being smaller (one iteration).

Move migration_calculate_complete() earlier so that the status
MIGRATION_STATUS_COMPLETED is only emitted after the final counters
update.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
---
 migration/migration.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

Comments

Peter Xu Feb. 21, 2024, 2:52 a.m. UTC | #1
On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
> The QMP command query_migrate might see incorrect throughput numbers
> if it runs after we've set the migration completion status but before
> migration_calculate_complete() has updated s->total_time and s->mbps.
> 
> The migration status would show COMPLETED, but the throughput value
> would be the one from the last iteration and not the one from the
> whole migration. This will usually be a larger value due to the time
> period being smaller (one iteration).
> 
> Move migration_calculate_complete() earlier so that the status
> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
> update.
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
> ---
>  migration/migration.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index ab21de2cad..7486d59da0 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
>                                   int new_state);
>  static void migrate_fd_cancel(MigrationState *s);
>  static bool close_return_path_on_source(MigrationState *s);
> +static void migration_calculate_complete(MigrationState *s);
>  
>  static void migration_downtime_start(MigrationState *s)
>  {
> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>                            MIGRATION_STATUS_COLO);
>      } else {
> +        migration_calculate_complete(s);
>          migrate_set_state(&s->state, current_active_state,
>                            MIGRATION_STATUS_COMPLETED);
>      }
> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
>          goto fail;
>      }
>  
> +    migration_calculate_complete(s);
>      migrate_set_state(&s->state, current_active_state,
>                        MIGRATION_STATUS_COMPLETED);
>      return;
> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>      int64_t transfer_time;
>  
> +    /* QMP could read from these concurrently */
> +    bql_lock();
>      migration_downtime_end(s);
>      s->total_time = end_time - s->start_time;
>      transfer_time = s->total_time - s->setup_time;
>      if (transfer_time) {
>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
>      }
> +    bql_unlock();

The lock is not needed?

AFAIU that was needed because of things like runstate_set() rather than
setting of these fields.

See migration_update_counters() where it also updates mbps without holding
a lock.

Other than that it looks all good.

>  }
>  
>  static void update_iteration_initial_status(MigrationState *s)
> @@ -3145,7 +3151,6 @@ static void migration_iteration_finish(MigrationState *s)
>      bql_lock();
>      switch (s->state) {
>      case MIGRATION_STATUS_COMPLETED:
> -        migration_calculate_complete(s);
>          runstate_set(RUN_STATE_POSTMIGRATE);
>          break;
>      case MIGRATION_STATUS_COLO:
> @@ -3189,9 +3194,6 @@ static void bg_migration_iteration_finish(MigrationState *s)
>      bql_lock();
>      switch (s->state) {
>      case MIGRATION_STATUS_COMPLETED:
> -        migration_calculate_complete(s);
> -        break;
> -
>      case MIGRATION_STATUS_ACTIVE:
>      case MIGRATION_STATUS_FAILED:
>      case MIGRATION_STATUS_CANCELLED:
> -- 
> 2.35.3
>
Fabiano Rosas Feb. 21, 2024, 12:56 p.m. UTC | #2
Peter Xu <peterx@redhat.com> writes:

> On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
>> The QMP command query_migrate might see incorrect throughput numbers
>> if it runs after we've set the migration completion status but before
>> migration_calculate_complete() has updated s->total_time and s->mbps.
>> 
>> The migration status would show COMPLETED, but the throughput value
>> would be the one from the last iteration and not the one from the
>> whole migration. This will usually be a larger value due to the time
>> period being smaller (one iteration).
>> 
>> Move migration_calculate_complete() earlier so that the status
>> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
>> update.
>> 
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> ---
>> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
>> ---
>>  migration/migration.c | 10 ++++++----
>>  1 file changed, 6 insertions(+), 4 deletions(-)
>> 
>> diff --git a/migration/migration.c b/migration/migration.c
>> index ab21de2cad..7486d59da0 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
>>                                   int new_state);
>>  static void migrate_fd_cancel(MigrationState *s);
>>  static bool close_return_path_on_source(MigrationState *s);
>> +static void migration_calculate_complete(MigrationState *s);
>>  
>>  static void migration_downtime_start(MigrationState *s)
>>  {
>> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
>>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>>                            MIGRATION_STATUS_COLO);
>>      } else {
>> +        migration_calculate_complete(s);
>>          migrate_set_state(&s->state, current_active_state,
>>                            MIGRATION_STATUS_COMPLETED);
>>      }
>> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
>>          goto fail;
>>      }
>>  
>> +    migration_calculate_complete(s);
>>      migrate_set_state(&s->state, current_active_state,
>>                        MIGRATION_STATUS_COMPLETED);
>>      return;
>> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
>>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>>      int64_t transfer_time;
>>  
>> +    /* QMP could read from these concurrently */
>> +    bql_lock();
>>      migration_downtime_end(s);
>>      s->total_time = end_time - s->start_time;
>>      transfer_time = s->total_time - s->setup_time;
>>      if (transfer_time) {
>>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
>>      }
>> +    bql_unlock();
>
> The lock is not needed?
>
> AFAIU that was needed because of things like runstate_set() rather than
> setting of these fields.
>

Don't we need to keep the total_time and mbps update atomic? Otherwise
query-migrate might see (say) total_time=0 and mbps=<correct value> or
total_time=<correct value> and mbps=<previous value>.

Also, what orders s->mbps update before the s->state update? I'd say we
should probably hold the lock around the whole total_time,mbps,state
update.

I'm not entirely sure, what do you think?

> See migration_update_counters() where it also updates mbps without holding
> a lock.

Here it might be less important since it's the middle of the migration,
there will proabably be more than one query-migrate which would see the
correct values.

>
> Other than that it looks all good.
>
>>  }
>>  
>>  static void update_iteration_initial_status(MigrationState *s)
>> @@ -3145,7 +3151,6 @@ static void migration_iteration_finish(MigrationState *s)
>>      bql_lock();
>>      switch (s->state) {
>>      case MIGRATION_STATUS_COMPLETED:
>> -        migration_calculate_complete(s);
>>          runstate_set(RUN_STATE_POSTMIGRATE);
>>          break;
>>      case MIGRATION_STATUS_COLO:
>> @@ -3189,9 +3194,6 @@ static void bg_migration_iteration_finish(MigrationState *s)
>>      bql_lock();
>>      switch (s->state) {
>>      case MIGRATION_STATUS_COMPLETED:
>> -        migration_calculate_complete(s);
>> -        break;
>> -
>>      case MIGRATION_STATUS_ACTIVE:
>>      case MIGRATION_STATUS_FAILED:
>>      case MIGRATION_STATUS_CANCELLED:
>> -- 
>> 2.35.3
>>
Peter Xu Feb. 22, 2024, 9:40 a.m. UTC | #3
On Wed, Feb 21, 2024 at 09:56:36AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
> >> The QMP command query_migrate might see incorrect throughput numbers
> >> if it runs after we've set the migration completion status but before
> >> migration_calculate_complete() has updated s->total_time and s->mbps.
> >> 
> >> The migration status would show COMPLETED, but the throughput value
> >> would be the one from the last iteration and not the one from the
> >> whole migration. This will usually be a larger value due to the time
> >> period being smaller (one iteration).
> >> 
> >> Move migration_calculate_complete() earlier so that the status
> >> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
> >> update.
> >> 
> >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> >> ---
> >> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
> >> ---
> >>  migration/migration.c | 10 ++++++----
> >>  1 file changed, 6 insertions(+), 4 deletions(-)
> >> 
> >> diff --git a/migration/migration.c b/migration/migration.c
> >> index ab21de2cad..7486d59da0 100644
> >> --- a/migration/migration.c
> >> +++ b/migration/migration.c
> >> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
> >>                                   int new_state);
> >>  static void migrate_fd_cancel(MigrationState *s);
> >>  static bool close_return_path_on_source(MigrationState *s);
> >> +static void migration_calculate_complete(MigrationState *s);
> >>  
> >>  static void migration_downtime_start(MigrationState *s)
> >>  {
> >> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
> >>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> >>                            MIGRATION_STATUS_COLO);
> >>      } else {
> >> +        migration_calculate_complete(s);
> >>          migrate_set_state(&s->state, current_active_state,
> >>                            MIGRATION_STATUS_COMPLETED);
> >>      }
> >> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
> >>          goto fail;
> >>      }
> >>  
> >> +    migration_calculate_complete(s);
> >>      migrate_set_state(&s->state, current_active_state,
> >>                        MIGRATION_STATUS_COMPLETED);
> >>      return;
> >> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
> >>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> >>      int64_t transfer_time;
> >>  
> >> +    /* QMP could read from these concurrently */
> >> +    bql_lock();
> >>      migration_downtime_end(s);
> >>      s->total_time = end_time - s->start_time;
> >>      transfer_time = s->total_time - s->setup_time;
> >>      if (transfer_time) {
> >>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
> >>      }
> >> +    bql_unlock();
> >
> > The lock is not needed?
> >
> > AFAIU that was needed because of things like runstate_set() rather than
> > setting of these fields.
> >
> 
> Don't we need to keep the total_time and mbps update atomic? Otherwise
> query-migrate might see (say) total_time=0 and mbps=<correct value> or
> total_time=<correct value> and mbps=<previous value>.

I thought it wasn't a major concern, but what you said makes sense; taking
it one more time doesn't really hurt after all to provide such benefit.

> 
> Also, what orders s->mbps update before the s->state update? I'd say we
> should probably hold the lock around the whole total_time,mbps,state
> update.

IMHO that's fine; mutex unlock implies a RELEASE.  See atomic.rst:

- ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
  release semantics and synchronizes with a ``pthread_mutex_lock`` for the
  same mutex.

> 
> I'm not entirely sure, what do you think?
> 
> > See migration_update_counters() where it also updates mbps without holding
> > a lock.
> 
> Here it might be less important since it's the middle of the migration,
> there will proabably be more than one query-migrate which would see the
> correct values.

Yep.  I queued this.

Thanks,
Peter Xu Feb. 22, 2024, 1:29 p.m. UTC | #4
On Thu, Feb 22, 2024 at 05:40:41PM +0800, Peter Xu wrote:
> On Wed, Feb 21, 2024 at 09:56:36AM -0300, Fabiano Rosas wrote:
> > Peter Xu <peterx@redhat.com> writes:
> > 
> > > On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
> > >> The QMP command query_migrate might see incorrect throughput numbers
> > >> if it runs after we've set the migration completion status but before
> > >> migration_calculate_complete() has updated s->total_time and s->mbps.
> > >> 
> > >> The migration status would show COMPLETED, but the throughput value
> > >> would be the one from the last iteration and not the one from the
> > >> whole migration. This will usually be a larger value due to the time
> > >> period being smaller (one iteration).
> > >> 
> > >> Move migration_calculate_complete() earlier so that the status
> > >> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
> > >> update.
> > >> 
> > >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> > >> ---
> > >> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
> > >> ---
> > >>  migration/migration.c | 10 ++++++----
> > >>  1 file changed, 6 insertions(+), 4 deletions(-)
> > >> 
> > >> diff --git a/migration/migration.c b/migration/migration.c
> > >> index ab21de2cad..7486d59da0 100644
> > >> --- a/migration/migration.c
> > >> +++ b/migration/migration.c
> > >> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
> > >>                                   int new_state);
> > >>  static void migrate_fd_cancel(MigrationState *s);
> > >>  static bool close_return_path_on_source(MigrationState *s);
> > >> +static void migration_calculate_complete(MigrationState *s);
> > >>  
> > >>  static void migration_downtime_start(MigrationState *s)
> > >>  {
> > >> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
> > >>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> > >>                            MIGRATION_STATUS_COLO);
> > >>      } else {
> > >> +        migration_calculate_complete(s);
> > >>          migrate_set_state(&s->state, current_active_state,
> > >>                            MIGRATION_STATUS_COMPLETED);
> > >>      }
> > >> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
> > >>          goto fail;
> > >>      }
> > >>  
> > >> +    migration_calculate_complete(s);
> > >>      migrate_set_state(&s->state, current_active_state,
> > >>                        MIGRATION_STATUS_COMPLETED);
> > >>      return;
> > >> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
> > >>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> > >>      int64_t transfer_time;
> > >>  
> > >> +    /* QMP could read from these concurrently */
> > >> +    bql_lock();
> > >>      migration_downtime_end(s);
> > >>      s->total_time = end_time - s->start_time;
> > >>      transfer_time = s->total_time - s->setup_time;
> > >>      if (transfer_time) {
> > >>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
> > >>      }
> > >> +    bql_unlock();
> > >
> > > The lock is not needed?
> > >
> > > AFAIU that was needed because of things like runstate_set() rather than
> > > setting of these fields.
> > >
> > 
> > Don't we need to keep the total_time and mbps update atomic? Otherwise
> > query-migrate might see (say) total_time=0 and mbps=<correct value> or
> > total_time=<correct value> and mbps=<previous value>.
> 
> I thought it wasn't a major concern, but what you said makes sense; taking
> it one more time doesn't really hurt after all to provide such benefit.
> 
> > 
> > Also, what orders s->mbps update before the s->state update? I'd say we
> > should probably hold the lock around the whole total_time,mbps,state
> > update.
> 
> IMHO that's fine; mutex unlock implies a RELEASE.  See atomic.rst:
> 
> - ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
>   release semantics and synchronizes with a ``pthread_mutex_lock`` for the
>   same mutex.

Hmm perhaps I wrote too soon.. it should only guarantee the ordering of the
update on the lock variable itself v.s. any previous R&Ws, nothing else.
Only if the other side uses bql_lock() will it guarantee proper ordering.

Put them in bql should work, but I hesitate such use to start using bql
to protect state updates.

How about we drop the lock, but use an explicit smp_mb_release()?  We may
also want to use smb_load_acquire() in fill_source_migration_info() to use
on reading &s->state (all will need some comment).  To me, making sure the
total mbps is valid seems more important; while the other races are less
harmful, and may not be a major concern?

PS: logically I think smp_mb_release() is not needed either, because state
is updated using qatomic_cmpxchg(), which implies a full __ATOMIC_SEQ_CST.

> 
> > 
> > I'm not entirely sure, what do you think?
> > 
> > > See migration_update_counters() where it also updates mbps without holding
> > > a lock.
> > 
> > Here it might be less important since it's the middle of the migration,
> > there will proabably be more than one query-migrate which would see the
> > correct values.
> 
> Yep.  I queued this.
> 
> Thanks,
> 
> -- 
> Peter Xu
Fabiano Rosas Feb. 22, 2024, 1:49 p.m. UTC | #5
Peter Xu <peterx@redhat.com> writes:

> On Thu, Feb 22, 2024 at 05:40:41PM +0800, Peter Xu wrote:
>> On Wed, Feb 21, 2024 at 09:56:36AM -0300, Fabiano Rosas wrote:
>> > Peter Xu <peterx@redhat.com> writes:
>> > 
>> > > On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
>> > >> The QMP command query_migrate might see incorrect throughput numbers
>> > >> if it runs after we've set the migration completion status but before
>> > >> migration_calculate_complete() has updated s->total_time and s->mbps.
>> > >> 
>> > >> The migration status would show COMPLETED, but the throughput value
>> > >> would be the one from the last iteration and not the one from the
>> > >> whole migration. This will usually be a larger value due to the time
>> > >> period being smaller (one iteration).
>> > >> 
>> > >> Move migration_calculate_complete() earlier so that the status
>> > >> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
>> > >> update.
>> > >> 
>> > >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> > >> ---
>> > >> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
>> > >> ---
>> > >>  migration/migration.c | 10 ++++++----
>> > >>  1 file changed, 6 insertions(+), 4 deletions(-)
>> > >> 
>> > >> diff --git a/migration/migration.c b/migration/migration.c
>> > >> index ab21de2cad..7486d59da0 100644
>> > >> --- a/migration/migration.c
>> > >> +++ b/migration/migration.c
>> > >> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
>> > >>                                   int new_state);
>> > >>  static void migrate_fd_cancel(MigrationState *s);
>> > >>  static bool close_return_path_on_source(MigrationState *s);
>> > >> +static void migration_calculate_complete(MigrationState *s);
>> > >>  
>> > >>  static void migration_downtime_start(MigrationState *s)
>> > >>  {
>> > >> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
>> > >>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>> > >>                            MIGRATION_STATUS_COLO);
>> > >>      } else {
>> > >> +        migration_calculate_complete(s);
>> > >>          migrate_set_state(&s->state, current_active_state,
>> > >>                            MIGRATION_STATUS_COMPLETED);
>> > >>      }
>> > >> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
>> > >>          goto fail;
>> > >>      }
>> > >>  
>> > >> +    migration_calculate_complete(s);
>> > >>      migrate_set_state(&s->state, current_active_state,
>> > >>                        MIGRATION_STATUS_COMPLETED);
>> > >>      return;
>> > >> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
>> > >>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>> > >>      int64_t transfer_time;
>> > >>  
>> > >> +    /* QMP could read from these concurrently */
>> > >> +    bql_lock();
>> > >>      migration_downtime_end(s);
>> > >>      s->total_time = end_time - s->start_time;
>> > >>      transfer_time = s->total_time - s->setup_time;
>> > >>      if (transfer_time) {
>> > >>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
>> > >>      }
>> > >> +    bql_unlock();
>> > >
>> > > The lock is not needed?
>> > >
>> > > AFAIU that was needed because of things like runstate_set() rather than
>> > > setting of these fields.
>> > >
>> > 
>> > Don't we need to keep the total_time and mbps update atomic? Otherwise
>> > query-migrate might see (say) total_time=0 and mbps=<correct value> or
>> > total_time=<correct value> and mbps=<previous value>.
>> 
>> I thought it wasn't a major concern, but what you said makes sense; taking
>> it one more time doesn't really hurt after all to provide such benefit.
>> 
>> > 
>> > Also, what orders s->mbps update before the s->state update? I'd say we
>> > should probably hold the lock around the whole total_time,mbps,state
>> > update.
>> 
>> IMHO that's fine; mutex unlock implies a RELEASE.  See atomic.rst:
>> 
>> - ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
>>   release semantics and synchronizes with a ``pthread_mutex_lock`` for the
>>   same mutex.
>
> Hmm perhaps I wrote too soon.. it should only guarantee the ordering of the
> update on the lock variable itself v.s. any previous R&Ws, nothing else.
> Only if the other side uses bql_lock() will it guarantee proper ordering.
>
> Put them in bql should work, but I hesitate such use to start using bql
> to protect state updates.

Well, on the other hand that's a major use-case of the BQL: protecting
state that's used by QMP.

>
> How about we drop the lock, but use an explicit smp_mb_release()?  We may
> also want to use smb_load_acquire() in fill_source_migration_info() to use
> on reading &s->state (all will need some comment).  To me, making sure the
> total mbps is valid seems more important; while the other races are less
> harmful, and may not be a major concern?

That more closely reflects the problem we're trying to solve, which is
just an ordering one. However, the QMP code already holds the BQL, we
could just take benefit of that instead of adding more complex
synchronization primitives.

May I suggest we keep it simple and move that last migrate_set_state
into the BQL as well?

>
> PS: logically I think smp_mb_release() is not needed either, because state
> is updated using qatomic_cmpxchg(), which implies a full __ATOMIC_SEQ_CST.
>
>> 
>> > 
>> > I'm not entirely sure, what do you think?
>> > 
>> > > See migration_update_counters() where it also updates mbps without holding
>> > > a lock.
>> > 
>> > Here it might be less important since it's the middle of the migration,
>> > there will proabably be more than one query-migrate which would see the
>> > correct values.
>> 
>> Yep.  I queued this.
>> 
>> Thanks,
>> 
>> -- 
>> Peter Xu
Peter Xu Feb. 23, 2024, 12:08 a.m. UTC | #6
On Thu, Feb 22, 2024 at 10:49:12AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Thu, Feb 22, 2024 at 05:40:41PM +0800, Peter Xu wrote:
> >> On Wed, Feb 21, 2024 at 09:56:36AM -0300, Fabiano Rosas wrote:
> >> > Peter Xu <peterx@redhat.com> writes:
> >> > 
> >> > > On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
> >> > >> The QMP command query_migrate might see incorrect throughput numbers
> >> > >> if it runs after we've set the migration completion status but before
> >> > >> migration_calculate_complete() has updated s->total_time and s->mbps.
> >> > >> 
> >> > >> The migration status would show COMPLETED, but the throughput value
> >> > >> would be the one from the last iteration and not the one from the
> >> > >> whole migration. This will usually be a larger value due to the time
> >> > >> period being smaller (one iteration).
> >> > >> 
> >> > >> Move migration_calculate_complete() earlier so that the status
> >> > >> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
> >> > >> update.
> >> > >> 
> >> > >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> >> > >> ---
> >> > >> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
> >> > >> ---
> >> > >>  migration/migration.c | 10 ++++++----
> >> > >>  1 file changed, 6 insertions(+), 4 deletions(-)
> >> > >> 
> >> > >> diff --git a/migration/migration.c b/migration/migration.c
> >> > >> index ab21de2cad..7486d59da0 100644
> >> > >> --- a/migration/migration.c
> >> > >> +++ b/migration/migration.c
> >> > >> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
> >> > >>                                   int new_state);
> >> > >>  static void migrate_fd_cancel(MigrationState *s);
> >> > >>  static bool close_return_path_on_source(MigrationState *s);
> >> > >> +static void migration_calculate_complete(MigrationState *s);
> >> > >>  
> >> > >>  static void migration_downtime_start(MigrationState *s)
> >> > >>  {
> >> > >> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
> >> > >>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> >> > >>                            MIGRATION_STATUS_COLO);
> >> > >>      } else {
> >> > >> +        migration_calculate_complete(s);
> >> > >>          migrate_set_state(&s->state, current_active_state,
> >> > >>                            MIGRATION_STATUS_COMPLETED);
> >> > >>      }
> >> > >> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
> >> > >>          goto fail;
> >> > >>      }
> >> > >>  
> >> > >> +    migration_calculate_complete(s);
> >> > >>      migrate_set_state(&s->state, current_active_state,
> >> > >>                        MIGRATION_STATUS_COMPLETED);
> >> > >>      return;
> >> > >> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
> >> > >>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> >> > >>      int64_t transfer_time;
> >> > >>  
> >> > >> +    /* QMP could read from these concurrently */
> >> > >> +    bql_lock();
> >> > >>      migration_downtime_end(s);
> >> > >>      s->total_time = end_time - s->start_time;
> >> > >>      transfer_time = s->total_time - s->setup_time;
> >> > >>      if (transfer_time) {
> >> > >>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
> >> > >>      }
> >> > >> +    bql_unlock();
> >> > >
> >> > > The lock is not needed?
> >> > >
> >> > > AFAIU that was needed because of things like runstate_set() rather than
> >> > > setting of these fields.
> >> > >
> >> > 
> >> > Don't we need to keep the total_time and mbps update atomic? Otherwise
> >> > query-migrate might see (say) total_time=0 and mbps=<correct value> or
> >> > total_time=<correct value> and mbps=<previous value>.
> >> 
> >> I thought it wasn't a major concern, but what you said makes sense; taking
> >> it one more time doesn't really hurt after all to provide such benefit.
> >> 
> >> > 
> >> > Also, what orders s->mbps update before the s->state update? I'd say we
> >> > should probably hold the lock around the whole total_time,mbps,state
> >> > update.
> >> 
> >> IMHO that's fine; mutex unlock implies a RELEASE.  See atomic.rst:
> >> 
> >> - ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
> >>   release semantics and synchronizes with a ``pthread_mutex_lock`` for the
> >>   same mutex.
> >
> > Hmm perhaps I wrote too soon.. it should only guarantee the ordering of the
> > update on the lock variable itself v.s. any previous R&Ws, nothing else.
> > Only if the other side uses bql_lock() will it guarantee proper ordering.
> >
> > Put them in bql should work, but I hesitate such use to start using bql
> > to protect state updates.
> 
> Well, on the other hand that's a major use-case of the BQL: protecting
> state that's used by QMP.
> 
> >
> > How about we drop the lock, but use an explicit smp_mb_release()?  We may
> > also want to use smb_load_acquire() in fill_source_migration_info() to use
> > on reading &s->state (all will need some comment).  To me, making sure the
> > total mbps is valid seems more important; while the other races are less
> > harmful, and may not be a major concern?
> 
> That more closely reflects the problem we're trying to solve, which is
> just an ordering one. However, the QMP code already holds the BQL, we
> could just take benefit of that instead of adding more complex
> synchronization primitives.
> 
> May I suggest we keep it simple and move that last migrate_set_state
> into the BQL as well?

It's okay to me, but then let's also extend the comment a little bit on the
two exact requirements we're persuing (atomicity of updating fields,
ordering of state update v.s. mbps)?

We can also rename migration_calculate_complete() to something like
migration_completion_finalize()?  Then move the state update into it.
Fabiano Rosas Feb. 23, 2024, 12:39 p.m. UTC | #7
Peter Xu <peterx@redhat.com> writes:

> On Thu, Feb 22, 2024 at 10:49:12AM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > On Thu, Feb 22, 2024 at 05:40:41PM +0800, Peter Xu wrote:
>> >> On Wed, Feb 21, 2024 at 09:56:36AM -0300, Fabiano Rosas wrote:
>> >> > Peter Xu <peterx@redhat.com> writes:
>> >> > 
>> >> > > On Mon, Feb 19, 2024 at 04:44:57PM -0300, Fabiano Rosas wrote:
>> >> > >> The QMP command query_migrate might see incorrect throughput numbers
>> >> > >> if it runs after we've set the migration completion status but before
>> >> > >> migration_calculate_complete() has updated s->total_time and s->mbps.
>> >> > >> 
>> >> > >> The migration status would show COMPLETED, but the throughput value
>> >> > >> would be the one from the last iteration and not the one from the
>> >> > >> whole migration. This will usually be a larger value due to the time
>> >> > >> period being smaller (one iteration).
>> >> > >> 
>> >> > >> Move migration_calculate_complete() earlier so that the status
>> >> > >> MIGRATION_STATUS_COMPLETED is only emitted after the final counters
>> >> > >> update.
>> >> > >> 
>> >> > >> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> >> > >> ---
>> >> > >> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1182405776
>> >> > >> ---
>> >> > >>  migration/migration.c | 10 ++++++----
>> >> > >>  1 file changed, 6 insertions(+), 4 deletions(-)
>> >> > >> 
>> >> > >> diff --git a/migration/migration.c b/migration/migration.c
>> >> > >> index ab21de2cad..7486d59da0 100644
>> >> > >> --- a/migration/migration.c
>> >> > >> +++ b/migration/migration.c
>> >> > >> @@ -102,6 +102,7 @@ static int migration_maybe_pause(MigrationState *s,
>> >> > >>                                   int new_state);
>> >> > >>  static void migrate_fd_cancel(MigrationState *s);
>> >> > >>  static bool close_return_path_on_source(MigrationState *s);
>> >> > >> +static void migration_calculate_complete(MigrationState *s);
>> >> > >>  
>> >> > >>  static void migration_downtime_start(MigrationState *s)
>> >> > >>  {
>> >> > >> @@ -2746,6 +2747,7 @@ static void migration_completion(MigrationState *s)
>> >> > >>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>> >> > >>                            MIGRATION_STATUS_COLO);
>> >> > >>      } else {
>> >> > >> +        migration_calculate_complete(s);
>> >> > >>          migrate_set_state(&s->state, current_active_state,
>> >> > >>                            MIGRATION_STATUS_COMPLETED);
>> >> > >>      }
>> >> > >> @@ -2784,6 +2786,7 @@ static void bg_migration_completion(MigrationState *s)
>> >> > >>          goto fail;
>> >> > >>      }
>> >> > >>  
>> >> > >> +    migration_calculate_complete(s);
>> >> > >>      migrate_set_state(&s->state, current_active_state,
>> >> > >>                        MIGRATION_STATUS_COMPLETED);
>> >> > >>      return;
>> >> > >> @@ -2993,12 +2996,15 @@ static void migration_calculate_complete(MigrationState *s)
>> >> > >>      int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>> >> > >>      int64_t transfer_time;
>> >> > >>  
>> >> > >> +    /* QMP could read from these concurrently */
>> >> > >> +    bql_lock();
>> >> > >>      migration_downtime_end(s);
>> >> > >>      s->total_time = end_time - s->start_time;
>> >> > >>      transfer_time = s->total_time - s->setup_time;
>> >> > >>      if (transfer_time) {
>> >> > >>          s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
>> >> > >>      }
>> >> > >> +    bql_unlock();
>> >> > >
>> >> > > The lock is not needed?
>> >> > >
>> >> > > AFAIU that was needed because of things like runstate_set() rather than
>> >> > > setting of these fields.
>> >> > >
>> >> > 
>> >> > Don't we need to keep the total_time and mbps update atomic? Otherwise
>> >> > query-migrate might see (say) total_time=0 and mbps=<correct value> or
>> >> > total_time=<correct value> and mbps=<previous value>.
>> >> 
>> >> I thought it wasn't a major concern, but what you said makes sense; taking
>> >> it one more time doesn't really hurt after all to provide such benefit.
>> >> 
>> >> > 
>> >> > Also, what orders s->mbps update before the s->state update? I'd say we
>> >> > should probably hold the lock around the whole total_time,mbps,state
>> >> > update.
>> >> 
>> >> IMHO that's fine; mutex unlock implies a RELEASE.  See atomic.rst:
>> >> 
>> >> - ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
>> >>   release semantics and synchronizes with a ``pthread_mutex_lock`` for the
>> >>   same mutex.
>> >
>> > Hmm perhaps I wrote too soon.. it should only guarantee the ordering of the
>> > update on the lock variable itself v.s. any previous R&Ws, nothing else.
>> > Only if the other side uses bql_lock() will it guarantee proper ordering.
>> >
>> > Put them in bql should work, but I hesitate such use to start using bql
>> > to protect state updates.
>> 
>> Well, on the other hand that's a major use-case of the BQL: protecting
>> state that's used by QMP.
>> 
>> >
>> > How about we drop the lock, but use an explicit smp_mb_release()?  We may
>> > also want to use smb_load_acquire() in fill_source_migration_info() to use
>> > on reading &s->state (all will need some comment).  To me, making sure the
>> > total mbps is valid seems more important; while the other races are less
>> > harmful, and may not be a major concern?
>> 
>> That more closely reflects the problem we're trying to solve, which is
>> just an ordering one. However, the QMP code already holds the BQL, we
>> could just take benefit of that instead of adding more complex
>> synchronization primitives.
>> 
>> May I suggest we keep it simple and move that last migrate_set_state
>> into the BQL as well?
>
> It's okay to me, but then let's also extend the comment a little bit on the
> two exact requirements we're persuing (atomicity of updating fields,
> ordering of state update v.s. mbps)?

Ok, I'll respin with these changes.

>
> We can also rename migration_calculate_complete() to something like
> migration_completion_finalize()?  Then move the state update into it.

I've been planning to merge migration_completion() and
migration_iteration_finish(). It's too unintuitive to do the completion
routine deep inside migration_iteration_run(). AFAICS those are all tail
calls, so we could bring migration_completion() up into the
migration_thread top level.

So if you'll allow me I think I'll refrain from moving the state into
migration_calculate_complete() for now.
Peter Xu Feb. 26, 2024, 1:44 a.m. UTC | #8
On Fri, Feb 23, 2024 at 09:39:12AM -0300, Fabiano Rosas wrote:
> I've been planning to merge migration_completion() and
> migration_iteration_finish(). It's too unintuitive to do the completion
> routine deep inside migration_iteration_run(). AFAICS those are all tail
> calls, so we could bring migration_completion() up into the
> migration_thread top level.
> 
> So if you'll allow me I think I'll refrain from moving the state into
> migration_calculate_complete() for now.

Yep go ahead, I'll read the patches.
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index ab21de2cad..7486d59da0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -102,6 +102,7 @@  static int migration_maybe_pause(MigrationState *s,
                                  int new_state);
 static void migrate_fd_cancel(MigrationState *s);
 static bool close_return_path_on_source(MigrationState *s);
+static void migration_calculate_complete(MigrationState *s);
 
 static void migration_downtime_start(MigrationState *s)
 {
@@ -2746,6 +2747,7 @@  static void migration_completion(MigrationState *s)
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                           MIGRATION_STATUS_COLO);
     } else {
+        migration_calculate_complete(s);
         migrate_set_state(&s->state, current_active_state,
                           MIGRATION_STATUS_COMPLETED);
     }
@@ -2784,6 +2786,7 @@  static void bg_migration_completion(MigrationState *s)
         goto fail;
     }
 
+    migration_calculate_complete(s);
     migrate_set_state(&s->state, current_active_state,
                       MIGRATION_STATUS_COMPLETED);
     return;
@@ -2993,12 +2996,15 @@  static void migration_calculate_complete(MigrationState *s)
     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     int64_t transfer_time;
 
+    /* QMP could read from these concurrently */
+    bql_lock();
     migration_downtime_end(s);
     s->total_time = end_time - s->start_time;
     transfer_time = s->total_time - s->setup_time;
     if (transfer_time) {
         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
     }
+    bql_unlock();
 }
 
 static void update_iteration_initial_status(MigrationState *s)
@@ -3145,7 +3151,6 @@  static void migration_iteration_finish(MigrationState *s)
     bql_lock();
     switch (s->state) {
     case MIGRATION_STATUS_COMPLETED:
-        migration_calculate_complete(s);
         runstate_set(RUN_STATE_POSTMIGRATE);
         break;
     case MIGRATION_STATUS_COLO:
@@ -3189,9 +3194,6 @@  static void bg_migration_iteration_finish(MigrationState *s)
     bql_lock();
     switch (s->state) {
     case MIGRATION_STATUS_COMPLETED:
-        migration_calculate_complete(s);
-        break;
-
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_FAILED:
     case MIGRATION_STATUS_CANCELLED: