diff mbox

[v4,4/4] migration: use the free page hint feature from balloon

Message ID 1520426065-40265-5-git-send-email-wei.w.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Wang March 7, 2018, 12:34 p.m. UTC
Start the free page optimization after the migration bitmap is
synchronized. This can't be used in the stop&copy phase since the guest
is paused. Make sure the guest reporting has stopped before
synchronizing the migration dirty bitmap. Currently, the optimization is
added to precopy only.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
---
 migration/ram.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin March 13, 2018, 4:35 p.m. UTC | #1
On Wed, Mar 07, 2018 at 08:34:25PM +0800, Wei Wang wrote:
> Start the free page optimization after the migration bitmap is
> synchronized. This can't be used in the stop&copy phase since the guest
> is paused. Make sure the guest reporting has stopped before
> synchronizing the migration dirty bitmap. Currently, the optimization is
> added to precopy only.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> ---
>  migration/ram.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index e172798..7b4c9b1 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -51,6 +51,8 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "sysemu/balloon.h"
> +#include "sysemu/sysemu.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -208,6 +210,8 @@ struct RAMState {
>      uint32_t last_version;
>      /* We are in the first round */
>      bool ram_bulk_stage;
> +    /* The free pages optimization feature is supported */
> +    bool free_page_support;
>      /* How many times we have dirty too many pages */
>      int dirty_rate_high_cnt;
>      /* these variables are used for bitmap sync */
> @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      unsigned long *bitmap = rb->bmap;
>      unsigned long next;
>  
> -    if (rs->ram_bulk_stage && start > 0) {
> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>          next = start + 1;
>      } else {
>          next = find_next_bit(bitmap, size, start);
> @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
>      int64_t end_time;
>      uint64_t bytes_xfer_now;
>  
> +    if (rs->free_page_support) {
> +        balloon_free_page_stop();
> +    }
> +
>      ram_counters.dirty_sync_count++;
>  
>      if (!rs->time_last_bitmap_sync) {
> @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
>      if (migrate_use_events()) {
>          qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
>      }
> +
> +    if (rs->free_page_support && runstate_is_running()) {
> +        balloon_free_page_start();
> +    }
>  }

I think some of these conditions should go into
balloon_free_page_start/stop.

Checking runstate is generally problematic unless you
also handle run state change notifiers as it can
be manipulated from QMP.

>  
>  /**
> @@ -1656,6 +1668,8 @@ static void ram_state_reset(RAMState *rs)
>      rs->last_page = 0;
>      rs->last_version = ram_list.version;
>      rs->ram_bulk_stage = true;
> +    rs->free_page_support = balloon_free_page_support() &
> +                            !migration_in_postcopy();

Probably &&?

>  }
>  
>  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -2330,6 +2344,9 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>  
>      ret = qemu_file_get_error(f);
>      if (ret < 0) {
> +        if (rs->free_page_support) {
> +            balloon_free_page_stop();
> +        }
>          return ret;
>      }
>  
> -- 
> 1.8.3.1
Wei Wang March 14, 2018, 2:41 a.m. UTC | #2
On 03/14/2018 12:35 AM, Michael S. Tsirkin wrote:
> On Wed, Mar 07, 2018 at 08:34:25PM +0800, Wei Wang wrote:
>> Start the free page optimization after the migration bitmap is
>> synchronized. This can't be used in the stop&copy phase since the guest
>> is paused. Make sure the guest reporting has stopped before
>> synchronizing the migration dirty bitmap. Currently, the optimization is
>> added to precopy only.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   migration/ram.c | 19 ++++++++++++++++++-
>>   1 file changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index e172798..7b4c9b1 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -51,6 +51,8 @@
>>   #include "qemu/rcu_queue.h"
>>   #include "migration/colo.h"
>>   #include "migration/block.h"
>> +#include "sysemu/balloon.h"
>> +#include "sysemu/sysemu.h"
>>   
>>   /***********************************************************/
>>   /* ram save/restore */
>> @@ -208,6 +210,8 @@ struct RAMState {
>>       uint32_t last_version;
>>       /* We are in the first round */
>>       bool ram_bulk_stage;
>> +    /* The free pages optimization feature is supported */
>> +    bool free_page_support;
>>       /* How many times we have dirty too many pages */
>>       int dirty_rate_high_cnt;
>>       /* these variables are used for bitmap sync */
>> @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>>       unsigned long *bitmap = rb->bmap;
>>       unsigned long next;
>>   
>> -    if (rs->ram_bulk_stage && start > 0) {
>> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>>           next = start + 1;
>>       } else {
>>           next = find_next_bit(bitmap, size, start);
>> @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
>>       int64_t end_time;
>>       uint64_t bytes_xfer_now;
>>   
>> +    if (rs->free_page_support) {
>> +        balloon_free_page_stop();
>> +    }
>> +
>>       ram_counters.dirty_sync_count++;
>>   
>>       if (!rs->time_last_bitmap_sync) {
>> @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
>>       if (migrate_use_events()) {
>>           qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
>>       }
>> +
>> +    if (rs->free_page_support && runstate_is_running()) {
>> +        balloon_free_page_start();
>> +    }
>>   }
> I think some of these conditions should go into
> balloon_free_page_start/stop.
>
> Checking runstate is generally problematic unless you
> also handle run state change notifiers as it can
> be manipulated from QMP.

How about moving the check of runstate to 
virtio_balloon_poll_free_page_hints:

while (dev->free_page_report_status < FREE_PAGE_REPORT_S_STOP && 
runstate_is_running()) {
...
}

In this case, I think we won't need a notifier - if the run state is 
changed by qmp, the optimization thread will just exist.


>>   
>>   /**
>> @@ -1656,6 +1668,8 @@ static void ram_state_reset(RAMState *rs)
>>       rs->last_page = 0;
>>       rs->last_version = ram_list.version;
>>       rs->ram_bulk_stage = true;
>> +    rs->free_page_support = balloon_free_page_support() &
>> +                            !migration_in_postcopy();
> Probably &&?
>

OK, will use &&. (Both work well here actually, since all of the values 
here are boolean)


Best,
Wei
Michael S. Tsirkin March 14, 2018, 2:51 a.m. UTC | #3
On Wed, Mar 14, 2018 at 10:41:36AM +0800, Wei Wang wrote:
> On 03/14/2018 12:35 AM, Michael S. Tsirkin wrote:
> > On Wed, Mar 07, 2018 at 08:34:25PM +0800, Wei Wang wrote:
> > > Start the free page optimization after the migration bitmap is
> > > synchronized. This can't be used in the stop&copy phase since the guest
> > > is paused. Make sure the guest reporting has stopped before
> > > synchronizing the migration dirty bitmap. Currently, the optimization is
> > > added to precopy only.
> > > 
> > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > CC: Juan Quintela <quintela@redhat.com>
> > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >   migration/ram.c | 19 ++++++++++++++++++-
> > >   1 file changed, 18 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index e172798..7b4c9b1 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -51,6 +51,8 @@
> > >   #include "qemu/rcu_queue.h"
> > >   #include "migration/colo.h"
> > >   #include "migration/block.h"
> > > +#include "sysemu/balloon.h"
> > > +#include "sysemu/sysemu.h"
> > >   /***********************************************************/
> > >   /* ram save/restore */
> > > @@ -208,6 +210,8 @@ struct RAMState {
> > >       uint32_t last_version;
> > >       /* We are in the first round */
> > >       bool ram_bulk_stage;
> > > +    /* The free pages optimization feature is supported */
> > > +    bool free_page_support;
> > >       /* How many times we have dirty too many pages */
> > >       int dirty_rate_high_cnt;
> > >       /* these variables are used for bitmap sync */
> > > @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
> > >       unsigned long *bitmap = rb->bmap;
> > >       unsigned long next;
> > > -    if (rs->ram_bulk_stage && start > 0) {
> > > +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
> > >           next = start + 1;
> > >       } else {
> > >           next = find_next_bit(bitmap, size, start);
> > > @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
> > >       int64_t end_time;
> > >       uint64_t bytes_xfer_now;
> > > +    if (rs->free_page_support) {
> > > +        balloon_free_page_stop();
> > > +    }
> > > +
> > >       ram_counters.dirty_sync_count++;
> > >       if (!rs->time_last_bitmap_sync) {
> > > @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
> > >       if (migrate_use_events()) {
> > >           qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
> > >       }
> > > +
> > > +    if (rs->free_page_support && runstate_is_running()) {
> > > +        balloon_free_page_start();
> > > +    }
> > >   }
> > I think some of these conditions should go into
> > balloon_free_page_start/stop.
> > 
> > Checking runstate is generally problematic unless you
> > also handle run state change notifiers as it can
> > be manipulated from QMP.
> 
> How about moving the check of runstate to
> virtio_balloon_poll_free_page_hints:
> 
> while (dev->free_page_report_status < FREE_PAGE_REPORT_S_STOP &&
> runstate_is_running()) {
> ...
> }

Hard to tell on the outset. E.g. why is just stop affected?  Pls add
comments explaining what happens if VM is not running when start or stop
is called.


> In this case, I think we won't need a notifier - if the run state is changed
> by qmp, the optimization thread will just exist.

But you need to wake it up and notify the guest presumably?

> 
> > >   /**
> > > @@ -1656,6 +1668,8 @@ static void ram_state_reset(RAMState *rs)
> > >       rs->last_page = 0;
> > >       rs->last_version = ram_list.version;
> > >       rs->ram_bulk_stage = true;
> > > +    rs->free_page_support = balloon_free_page_support() &
> > > +                            !migration_in_postcopy();
> > Probably &&?
> > 
> 
> OK, will use &&. (Both work well here actually, since all of the values here
> are boolean)
> 
> 
> Best,
> Wei
Wei Wang March 14, 2018, 6:50 a.m. UTC | #4
On 03/14/2018 10:51 AM, Michael S. Tsirkin wrote:
> On Wed, Mar 14, 2018 at 10:41:36AM +0800, Wei Wang wrote:
>> On 03/14/2018 12:35 AM, Michael S. Tsirkin wrote:
>>> On Wed, Mar 07, 2018 at 08:34:25PM +0800, Wei Wang wrote:
>>>> Start the free page optimization after the migration bitmap is
>>>> synchronized. This can't be used in the stop&copy phase since the guest
>>>> is paused. Make sure the guest reporting has stopped before
>>>> synchronizing the migration dirty bitmap. Currently, the optimization is
>>>> added to precopy only.
>>>>
>>>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>>>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> CC: Juan Quintela <quintela@redhat.com>
>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>> ---
>>>>    migration/ram.c | 19 ++++++++++++++++++-
>>>>    1 file changed, 18 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>> index e172798..7b4c9b1 100644
>>>> --- a/migration/ram.c
>>>> +++ b/migration/ram.c
>>>> @@ -51,6 +51,8 @@
>>>>    #include "qemu/rcu_queue.h"
>>>>    #include "migration/colo.h"
>>>>    #include "migration/block.h"
>>>> +#include "sysemu/balloon.h"
>>>> +#include "sysemu/sysemu.h"
>>>>    /***********************************************************/
>>>>    /* ram save/restore */
>>>> @@ -208,6 +210,8 @@ struct RAMState {
>>>>        uint32_t last_version;
>>>>        /* We are in the first round */
>>>>        bool ram_bulk_stage;
>>>> +    /* The free pages optimization feature is supported */
>>>> +    bool free_page_support;
>>>>        /* How many times we have dirty too many pages */
>>>>        int dirty_rate_high_cnt;
>>>>        /* these variables are used for bitmap sync */
>>>> @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>>>>        unsigned long *bitmap = rb->bmap;
>>>>        unsigned long next;
>>>> -    if (rs->ram_bulk_stage && start > 0) {
>>>> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>>>>            next = start + 1;
>>>>        } else {
>>>>            next = find_next_bit(bitmap, size, start);
>>>> @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
>>>>        int64_t end_time;
>>>>        uint64_t bytes_xfer_now;
>>>> +    if (rs->free_page_support) {
>>>> +        balloon_free_page_stop();
>>>> +    }
>>>> +
>>>>        ram_counters.dirty_sync_count++;
>>>>        if (!rs->time_last_bitmap_sync) {
>>>> @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
>>>>        if (migrate_use_events()) {
>>>>            qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
>>>>        }
>>>> +
>>>> +    if (rs->free_page_support && runstate_is_running()) {
>>>> +        balloon_free_page_start();
>>>> +    }
>>>>    }
>>> I think some of these conditions should go into
>>> balloon_free_page_start/stop.
>>>
>>> Checking runstate is generally problematic unless you
>>> also handle run state change notifiers as it can
>>> be manipulated from QMP.
>> How about moving the check of runstate to
>> virtio_balloon_poll_free_page_hints:
>>
>> while (dev->free_page_report_status < FREE_PAGE_REPORT_S_STOP &&
>> runstate_is_running()) {
>> ...
>> }
> Hard to tell on the outset. E.g. why is just stop affected?  Pls add
> comments explaining what happens if VM is not running when start or stop
> is called.
>
>
>> In this case, I think we won't need a notifier - if the run state is changed
>> by qmp, the optimization thread will just exist.
> But you need to wake it up and notify the guest presumably?
>


I think it's not necessary to wake it up, because when the VM is not 
running, there will be no hints reported to the vq, and the optimization 
thread exits. (there is no issue in that case)
Probably we can add a notifier which calls 
virtio_balloon_free_page_stop() when qmp wakes up the VM.

Best,
Wei
Michael S. Tsirkin March 14, 2018, 2:45 p.m. UTC | #5
On Wed, Mar 14, 2018 at 02:50:44PM +0800, Wei Wang wrote:
> On 03/14/2018 10:51 AM, Michael S. Tsirkin wrote:
> > On Wed, Mar 14, 2018 at 10:41:36AM +0800, Wei Wang wrote:
> > > On 03/14/2018 12:35 AM, Michael S. Tsirkin wrote:
> > > > On Wed, Mar 07, 2018 at 08:34:25PM +0800, Wei Wang wrote:
> > > > > Start the free page optimization after the migration bitmap is
> > > > > synchronized. This can't be used in the stop&copy phase since the guest
> > > > > is paused. Make sure the guest reporting has stopped before
> > > > > synchronizing the migration dirty bitmap. Currently, the optimization is
> > > > > added to precopy only.
> > > > > 
> > > > > Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> > > > > CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > CC: Michael S. Tsirkin <mst@redhat.com>
> > > > > ---
> > > > >    migration/ram.c | 19 ++++++++++++++++++-
> > > > >    1 file changed, 18 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > > index e172798..7b4c9b1 100644
> > > > > --- a/migration/ram.c
> > > > > +++ b/migration/ram.c
> > > > > @@ -51,6 +51,8 @@
> > > > >    #include "qemu/rcu_queue.h"
> > > > >    #include "migration/colo.h"
> > > > >    #include "migration/block.h"
> > > > > +#include "sysemu/balloon.h"
> > > > > +#include "sysemu/sysemu.h"
> > > > >    /***********************************************************/
> > > > >    /* ram save/restore */
> > > > > @@ -208,6 +210,8 @@ struct RAMState {
> > > > >        uint32_t last_version;
> > > > >        /* We are in the first round */
> > > > >        bool ram_bulk_stage;
> > > > > +    /* The free pages optimization feature is supported */
> > > > > +    bool free_page_support;
> > > > >        /* How many times we have dirty too many pages */
> > > > >        int dirty_rate_high_cnt;
> > > > >        /* these variables are used for bitmap sync */
> > > > > @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
> > > > >        unsigned long *bitmap = rb->bmap;
> > > > >        unsigned long next;
> > > > > -    if (rs->ram_bulk_stage && start > 0) {
> > > > > +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
> > > > >            next = start + 1;
> > > > >        } else {
> > > > >            next = find_next_bit(bitmap, size, start);
> > > > > @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
> > > > >        int64_t end_time;
> > > > >        uint64_t bytes_xfer_now;
> > > > > +    if (rs->free_page_support) {
> > > > > +        balloon_free_page_stop();
> > > > > +    }
> > > > > +
> > > > >        ram_counters.dirty_sync_count++;
> > > > >        if (!rs->time_last_bitmap_sync) {
> > > > > @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
> > > > >        if (migrate_use_events()) {
> > > > >            qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
> > > > >        }
> > > > > +
> > > > > +    if (rs->free_page_support && runstate_is_running()) {
> > > > > +        balloon_free_page_start();
> > > > > +    }
> > > > >    }
> > > > I think some of these conditions should go into
> > > > balloon_free_page_start/stop.
> > > > 
> > > > Checking runstate is generally problematic unless you
> > > > also handle run state change notifiers as it can
> > > > be manipulated from QMP.
> > > How about moving the check of runstate to
> > > virtio_balloon_poll_free_page_hints:
> > > 
> > > while (dev->free_page_report_status < FREE_PAGE_REPORT_S_STOP &&
> > > runstate_is_running()) {
> > > ...
> > > }
> > Hard to tell on the outset. E.g. why is just stop affected?  Pls add
> > comments explaining what happens if VM is not running when start or stop
> > is called.
> > 
> > 
> > > In this case, I think we won't need a notifier - if the run state is changed
> > > by qmp, the optimization thread will just exist.
> > But you need to wake it up and notify the guest presumably?
> > 
> 
> 
> I think it's not necessary to wake it up, because when the VM is not
> running, there will be no hints reported to the vq, and the optimization
> thread exits. (there is no issue in that case)
> Probably we can add a notifier which calls virtio_balloon_free_page_stop()
> when qmp wakes up the VM.
> 
> Best,
> Wei

set_status callback is invoked so you can use that maybe.

Might be a good idea to handle a bunch of other corner
cases e.g. if guest driver is loaded when migration
is already in progress.
Dr. David Alan Gilbert March 14, 2018, 7:49 p.m. UTC | #6
* Wei Wang (wei.w.wang@intel.com) wrote:
> Start the free page optimization after the migration bitmap is
> synchronized. This can't be used in the stop&copy phase since the guest
> is paused. Make sure the guest reporting has stopped before
> synchronizing the migration dirty bitmap. Currently, the optimization is
> added to precopy only.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> ---
>  migration/ram.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index e172798..7b4c9b1 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -51,6 +51,8 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "sysemu/balloon.h"
> +#include "sysemu/sysemu.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -208,6 +210,8 @@ struct RAMState {
>      uint32_t last_version;
>      /* We are in the first round */
>      bool ram_bulk_stage;
> +    /* The free pages optimization feature is supported */
> +    bool free_page_support;
>      /* How many times we have dirty too many pages */
>      int dirty_rate_high_cnt;
>      /* these variables are used for bitmap sync */
> @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      unsigned long *bitmap = rb->bmap;
>      unsigned long next;
>  
> -    if (rs->ram_bulk_stage && start > 0) {
> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>          next = start + 1;

An easier thing is just to clear the ram_bulk_stage flag (and if you're
doing it in the middle of the migration you need to reset some of the
pointers; see postcopy_start for an example).

>      } else {
>          next = find_next_bit(bitmap, size, start);
> @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
>      int64_t end_time;
>      uint64_t bytes_xfer_now;
>  
> +    if (rs->free_page_support) {
> +        balloon_free_page_stop();

Does balloon_free_page_stop cause it to immediately stop, or does it
just ask nicely?   Could a slow guest keep pumping advice to us even
when it was stopped?

> +    }
> +
>      ram_counters.dirty_sync_count++;
>  
>      if (!rs->time_last_bitmap_sync) {
> @@ -899,6 +907,10 @@ static void migration_bitmap_sync(RAMState *rs)
>      if (migrate_use_events()) {
>          qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
>      }
> +
> +    if (rs->free_page_support && runstate_is_running()) {
> +        balloon_free_page_start();
> +    }
>  }
>  
>  /**
> @@ -1656,6 +1668,8 @@ static void ram_state_reset(RAMState *rs)
>      rs->last_page = 0;
>      rs->last_version = ram_list.version;
>      rs->ram_bulk_stage = true;
> +    rs->free_page_support = balloon_free_page_support() &
> +                            !migration_in_postcopy();

That's probably the wrong test for postcopy; I think it'll always
be false there.  Using migrate_postcopy_ram() tells you whether
postcopy-ram is enabled; although not necessarily in use at that
point.

Dave

>  }
>  
>  #define MAX_WAIT 50 /* ms, half buffered_file limit */
> @@ -2330,6 +2344,9 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>  
>      ret = qemu_file_get_error(f);
>      if (ret < 0) {
> +        if (rs->free_page_support) {
> +            balloon_free_page_stop();
> +        }
>          return ret;
>      }
>  
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wei Wang March 16, 2018, 11:20 a.m. UTC | #7
On 03/15/2018 03:49 AM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> Start the free page optimization after the migration bitmap is
>> synchronized. This can't be used in the stop&copy phase since the guest
>> is paused. Make sure the guest reporting has stopped before
>> synchronizing the migration dirty bitmap. Currently, the optimization is
>> added to precopy only.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
>> CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> CC: Juan Quintela <quintela@redhat.com>
>> CC: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   migration/ram.c | 19 ++++++++++++++++++-
>>   1 file changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index e172798..7b4c9b1 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -51,6 +51,8 @@
>>   #include "qemu/rcu_queue.h"
>>   #include "migration/colo.h"
>>   #include "migration/block.h"
>> +#include "sysemu/balloon.h"
>> +#include "sysemu/sysemu.h"
>>   
>>   /***********************************************************/
>>   /* ram save/restore */
>> @@ -208,6 +210,8 @@ struct RAMState {
>>       uint32_t last_version;
>>       /* We are in the first round */
>>       bool ram_bulk_stage;
>> +    /* The free pages optimization feature is supported */
>> +    bool free_page_support;
>>       /* How many times we have dirty too many pages */
>>       int dirty_rate_high_cnt;
>>       /* these variables are used for bitmap sync */
>> @@ -775,7 +779,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>>       unsigned long *bitmap = rb->bmap;
>>       unsigned long next;
>>   
>> -    if (rs->ram_bulk_stage && start > 0) {
>> +    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
>>           next = start + 1;
> An easier thing is just to clear the ram_bulk_stage flag (and if you're
> doing it in the middle of the migration you need to reset some of the
> pointers; see postcopy_start for an example).
>
>>       } else {
>>           next = find_next_bit(bitmap, size, start);
>> @@ -833,6 +837,10 @@ static void migration_bitmap_sync(RAMState *rs)
>>       int64_t end_time;
>>       uint64_t bytes_xfer_now;
>>   
>> +    if (rs->free_page_support) {
>> +        balloon_free_page_stop();
> Does balloon_free_page_stop cause it to immediately stop, or does it
> just ask nicely?   Could a slow guest keep pumping advice to us even
> when it was stopped?
>

Yes, balloon_free_page_stop will cause the optimization thread to exit 
immediately. It doesn't rely on anything from the guest.
The guest won't keep reporting, since before the optimization thread 
exits, it sends a stop sign to the guest to stop reporting (but not 
waiting for any ACKs as that's not needed actually).

I also applied other comments in the new version, please have check v5 
patches. Thanks.

Best,
Wei
diff mbox

Patch

diff --git a/migration/ram.c b/migration/ram.c
index e172798..7b4c9b1 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -51,6 +51,8 @@ 
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 #include "migration/block.h"
+#include "sysemu/balloon.h"
+#include "sysemu/sysemu.h"
 
 /***********************************************************/
 /* ram save/restore */
@@ -208,6 +210,8 @@  struct RAMState {
     uint32_t last_version;
     /* We are in the first round */
     bool ram_bulk_stage;
+    /* The free pages optimization feature is supported */
+    bool free_page_support;
     /* How many times we have dirty too many pages */
     int dirty_rate_high_cnt;
     /* these variables are used for bitmap sync */
@@ -775,7 +779,7 @@  unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
     unsigned long *bitmap = rb->bmap;
     unsigned long next;
 
-    if (rs->ram_bulk_stage && start > 0) {
+    if (rs->ram_bulk_stage && start > 0 && !rs->free_page_support) {
         next = start + 1;
     } else {
         next = find_next_bit(bitmap, size, start);
@@ -833,6 +837,10 @@  static void migration_bitmap_sync(RAMState *rs)
     int64_t end_time;
     uint64_t bytes_xfer_now;
 
+    if (rs->free_page_support) {
+        balloon_free_page_stop();
+    }
+
     ram_counters.dirty_sync_count++;
 
     if (!rs->time_last_bitmap_sync) {
@@ -899,6 +907,10 @@  static void migration_bitmap_sync(RAMState *rs)
     if (migrate_use_events()) {
         qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
     }
+
+    if (rs->free_page_support && runstate_is_running()) {
+        balloon_free_page_start();
+    }
 }
 
 /**
@@ -1656,6 +1668,8 @@  static void ram_state_reset(RAMState *rs)
     rs->last_page = 0;
     rs->last_version = ram_list.version;
     rs->ram_bulk_stage = true;
+    rs->free_page_support = balloon_free_page_support() &
+                            !migration_in_postcopy();
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2330,6 +2344,9 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
 
     ret = qemu_file_get_error(f);
     if (ret < 0) {
+        if (rs->free_page_support) {
+            balloon_free_page_stop();
+        }
         return ret;
     }