diff mbox series

migration: Do not perform RAMBlock dirty sync during the first iteration

Message ID ad543bac0eb9e7113eaec266add58c19f9f6eda0.1730973055.git.yong.huang@smartx.com (mailing list archive)
State New
Headers show
Series migration: Do not perform RAMBlock dirty sync during the first iteration | expand

Commit Message

Yong Huang Nov. 7, 2024, 9:56 a.m. UTC
From: Hyman Huang <yong.huang@smartx.com>

The first iteration's RAMBlock dirty sync can be omitted because QEMU
always initializes the RAMBlock's bmap to all 1s by default.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 migration/cpu-throttle.c |  2 +-
 migration/ram.c          | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

Comments

Peter Xu Nov. 7, 2024, 4:28 p.m. UTC | #1
On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> From: Hyman Huang <yong.huang@smartx.com>
> 
> The first iteration's RAMBlock dirty sync can be omitted because QEMU
> always initializes the RAMBlock's bmap to all 1s by default.
> 
> Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> ---
>  migration/cpu-throttle.c |  2 +-
>  migration/ram.c          | 19 ++++++++++++++++---
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> index 5179019e33..674dc2004e 100644
> --- a/migration/cpu-throttle.c
> +++ b/migration/cpu-throttle.c
> @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
>       * effect on guest performance, therefore omit it to avoid
>       * paying extra for the sync penalty.
>       */
> -    if (sync_cnt <= 1) {
> +    if (!sync_cnt) {
>          goto end;
>      }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 05ff9eb328..a0123eb93e 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>  {
>      MigrationState *ms = migrate_get_current();
>      RAMBlock *block;
> -    unsigned long pages;
> +    unsigned long pages, clear_bmap_pages;
>      uint8_t shift;
>  
>      /* Skip setting bitmap if there is no RAM */
> @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>  
>          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
> +            clear_bmap_pages = clear_bmap_size(pages, shift);
>              /*
>               * The initial dirty bitmap for migration must be set with all
>               * ones to make sure we'll migrate every guest RAM page to
> @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>                  block->file_bmap = bitmap_new(pages);
>              }
>              block->clear_bmap_shift = shift;
> -            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> +            /*
> +             * Set clear_bmap to 1 unconditionally, as we always set bmap
> +             * to all 1s by default.
> +             */
> +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
>          }
>      }
>  }
> @@ -2771,6 +2777,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>  
>  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>  {
> +    Error *local_err = NULL;
>      bool ret = true;
>  
>      qemu_mutex_lock_ramlist();
> @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>              if (!ret) {
>                  goto out_unlock;
>              }
> -            migration_bitmap_sync_precopy(false);
> +            /*
> +             * Bypass the RAMBlock dirty sync and still publish the
> +             * notification.

Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?

> +             */
> +            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
> +                error_report_err(local_err);
> +            }
>          }
>      }
>  out_unlock:
> -- 
> 2.27.0
>
Yong Huang Nov. 8, 2024, 6:03 a.m. UTC | #2
On Fri, Nov 8, 2024 at 12:28 AM Peter Xu <peterx@redhat.com> wrote:

> On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> > From: Hyman Huang <yong.huang@smartx.com>
> >
> > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > always initializes the RAMBlock's bmap to all 1s by default.
> >
> > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > ---
> >  migration/cpu-throttle.c |  2 +-
> >  migration/ram.c          | 19 ++++++++++++++++---
> >  2 files changed, 17 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > index 5179019e33..674dc2004e 100644
> > --- a/migration/cpu-throttle.c
> > +++ b/migration/cpu-throttle.c
> > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> >       * effect on guest performance, therefore omit it to avoid
> >       * paying extra for the sync penalty.
> >       */
> > -    if (sync_cnt <= 1) {
> > +    if (!sync_cnt) {
> >          goto end;
> >      }
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 05ff9eb328..a0123eb93e 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >  {
> >      MigrationState *ms = migrate_get_current();
> >      RAMBlock *block;
> > -    unsigned long pages;
> > +    unsigned long pages, clear_bmap_pages;
> >      uint8_t shift;
> >
> >      /* Skip setting bitmap if there is no RAM */
> > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >
> >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >              pages = block->max_length >> TARGET_PAGE_BITS;
> > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >              /*
> >               * The initial dirty bitmap for migration must be set with
> all
> >               * ones to make sure we'll migrate every guest RAM page to
> > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >                  block->file_bmap = bitmap_new(pages);
> >              }
> >              block->clear_bmap_shift = shift;
> > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> shift));
> > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > +            /*
> > +             * Set clear_bmap to 1 unconditionally, as we always set
> bmap
> > +             * to all 1s by default.
> > +             */
> > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >          }
> >      }
> >  }
> > @@ -2771,6 +2777,7 @@ static void
> migration_bitmap_clear_discarded_pages(RAMState *rs)
> >
> >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> >  {
> > +    Error *local_err = NULL;
> >      bool ret = true;
> >
> >      qemu_mutex_lock_ramlist();
> > @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> **errp)
> >              if (!ret) {
> >                  goto out_unlock;
> >              }
> > -            migration_bitmap_sync_precopy(false);
> > +            /*
> > +             * Bypass the RAMBlock dirty sync and still publish the
> > +             * notification.
>
> Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?
>

Indeed, logically, we should not make the notification.

Some features, like VIRTIO_BALLOON_F_FREE_PAGE_HINT, use this notification
to indirectly detect whether the RAMBlock's bmap has been updated. This
allows the
free page optimization to begin clearing parts of the bitmap that contain
free pages.

virtio_balloon_free_page_hint_notify
......
    switch (pnd->reason) {
    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
        virtio_balloon_free_page_stop(dev);
        break;
    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
        if (vdev->vm_running) {
            virtio_balloon_free_page_start(dev);
            break;
        }

The free page optimization may miss the first time window to execute if we
don't
send out a notification after starting the migration and initializing the
bmap with all 1s.

May we change the old behavior of optimization?


> > +             */
> > +            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
> &local_err)) {
> > +                error_report_err(local_err);
> > +            }
> >          }
> >      }
> >  out_unlock:
> > --
> > 2.27.0
> >
>
> --
> Peter Xu
>
>
Peter Xu Nov. 8, 2024, 1:50 p.m. UTC | #3
On Fri, Nov 08, 2024 at 02:03:47PM +0800, Yong Huang wrote:
> On Fri, Nov 8, 2024 at 12:28 AM Peter Xu <peterx@redhat.com> wrote:
> 
> > On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> > > From: Hyman Huang <yong.huang@smartx.com>
> > >
> > > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > > always initializes the RAMBlock's bmap to all 1s by default.
> > >
> > > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > > ---
> > >  migration/cpu-throttle.c |  2 +-
> > >  migration/ram.c          | 19 ++++++++++++++++---
> > >  2 files changed, 17 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > > index 5179019e33..674dc2004e 100644
> > > --- a/migration/cpu-throttle.c
> > > +++ b/migration/cpu-throttle.c
> > > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> > >       * effect on guest performance, therefore omit it to avoid
> > >       * paying extra for the sync penalty.
> > >       */
> > > -    if (sync_cnt <= 1) {
> > > +    if (!sync_cnt) {
> > >          goto end;
> > >      }
> > >
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index 05ff9eb328..a0123eb93e 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> > >  {
> > >      MigrationState *ms = migrate_get_current();
> > >      RAMBlock *block;
> > > -    unsigned long pages;
> > > +    unsigned long pages, clear_bmap_pages;
> > >      uint8_t shift;
> > >
> > >      /* Skip setting bitmap if there is no RAM */
> > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> > >
> > >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > >              pages = block->max_length >> TARGET_PAGE_BITS;
> > > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> > >              /*
> > >               * The initial dirty bitmap for migration must be set with
> > all
> > >               * ones to make sure we'll migrate every guest RAM page to
> > > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> > >                  block->file_bmap = bitmap_new(pages);
> > >              }
> > >              block->clear_bmap_shift = shift;
> > > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> > shift));
> > > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > > +            /*
> > > +             * Set clear_bmap to 1 unconditionally, as we always set
> > bmap
> > > +             * to all 1s by default.
> > > +             */
> > > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > >          }
> > >      }
> > >  }
> > > @@ -2771,6 +2777,7 @@ static void
> > migration_bitmap_clear_discarded_pages(RAMState *rs)
> > >
> > >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> > >  {
> > > +    Error *local_err = NULL;
> > >      bool ret = true;
> > >
> > >      qemu_mutex_lock_ramlist();
> > > @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> > **errp)
> > >              if (!ret) {
> > >                  goto out_unlock;
> > >              }
> > > -            migration_bitmap_sync_precopy(false);
> > > +            /*
> > > +             * Bypass the RAMBlock dirty sync and still publish the
> > > +             * notification.
> >
> > Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?
> >
> 
> Indeed, logically, we should not make the notification.
> 
> Some features, like VIRTIO_BALLOON_F_FREE_PAGE_HINT, use this notification
> to indirectly detect whether the RAMBlock's bmap has been updated. This
> allows the
> free page optimization to begin clearing parts of the bitmap that contain
> free pages.
> 
> virtio_balloon_free_page_hint_notify
> ......
>     switch (pnd->reason) {
>     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
>         virtio_balloon_free_page_stop(dev);
>         break;
>     case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
>         if (vdev->vm_running) {
>             virtio_balloon_free_page_start(dev);
>             break;
>         }
> 
> The free page optimization may miss the first time window to execute if we
> don't
> send out a notification after starting the migration and initializing the
> bmap with all 1s.
> 
> May we change the old behavior of optimization?

I see.

It looks like an abuse to me so far to use AFTER_BITMAP_SYNC as start of
free page hinting.  There's no guarantee a sync is needed when start
migration.

I think we may want a pre-requisite patch to enable free page hinting
during PRECOPY_NOTIFY_SETUP too, like PRECOPY_NOTIFY_AFTER_BITMAP_SYNC.
That patch (if you agree) will need to copy David Hildenbrand and Wei Wang
(original author).

Thanks,
diff mbox series

Patch

diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
index 5179019e33..674dc2004e 100644
--- a/migration/cpu-throttle.c
+++ b/migration/cpu-throttle.c
@@ -141,7 +141,7 @@  void cpu_throttle_dirty_sync_timer_tick(void *opaque)
      * effect on guest performance, therefore omit it to avoid
      * paying extra for the sync penalty.
      */
-    if (sync_cnt <= 1) {
+    if (!sync_cnt) {
         goto end;
     }
 
diff --git a/migration/ram.c b/migration/ram.c
index 05ff9eb328..a0123eb93e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2718,7 +2718,7 @@  static void ram_list_init_bitmaps(void)
 {
     MigrationState *ms = migrate_get_current();
     RAMBlock *block;
-    unsigned long pages;
+    unsigned long pages, clear_bmap_pages;
     uint8_t shift;
 
     /* Skip setting bitmap if there is no RAM */
@@ -2736,6 +2736,7 @@  static void ram_list_init_bitmaps(void)
 
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            clear_bmap_pages = clear_bmap_size(pages, shift);
             /*
              * The initial dirty bitmap for migration must be set with all
              * ones to make sure we'll migrate every guest RAM page to
@@ -2751,7 +2752,12 @@  static void ram_list_init_bitmaps(void)
                 block->file_bmap = bitmap_new(pages);
             }
             block->clear_bmap_shift = shift;
-            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
+            block->clear_bmap = bitmap_new(clear_bmap_pages);
+            /*
+             * Set clear_bmap to 1 unconditionally, as we always set bmap
+             * to all 1s by default.
+             */
+            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
         }
     }
 }
@@ -2771,6 +2777,7 @@  static void migration_bitmap_clear_discarded_pages(RAMState *rs)
 
 static bool ram_init_bitmaps(RAMState *rs, Error **errp)
 {
+    Error *local_err = NULL;
     bool ret = true;
 
     qemu_mutex_lock_ramlist();
@@ -2783,7 +2790,13 @@  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
             if (!ret) {
                 goto out_unlock;
             }
-            migration_bitmap_sync_precopy(false);
+            /*
+             * Bypass the RAMBlock dirty sync and still publish the
+             * notification.
+             */
+            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
+                error_report_err(local_err);
+            }
         }
     }
 out_unlock: