diff mbox series

[v4,3/7] migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.

Message ID 20240301022829.3390548-4-hao.xiang@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Introduce multifd zero page checking. | expand

Commit Message

Hao Xiang March 1, 2024, 2:28 a.m. UTC
1. Add a dedicated handler for MigrationOps::ram_save_target_page in
multifd live migration.
2. Refactor ram_save_target_page_legacy so that the legacy and multifd
handlers don't have internal functions calling into each other.

Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Message-Id: <20240226195654.934709-4-hao.xiang@bytedance.com>
---
 migration/ram.c | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

Comments

Peter Xu March 4, 2024, 7:46 a.m. UTC | #1
On Fri, Mar 01, 2024 at 02:28:25AM +0000, Hao Xiang wrote:
> 1. Add a dedicated handler for MigrationOps::ram_save_target_page in
> multifd live migration.
> 2. Refactor ram_save_target_page_legacy so that the legacy and multifd
> handlers don't have internal functions calling into each other.
> 
> Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> Reviewed-by: Fabiano Rosas <farosas@suse.de>
> Message-Id: <20240226195654.934709-4-hao.xiang@bytedance.com>
> ---
>  migration/ram.c | 43 ++++++++++++++++++++++++++++++-------------
>  1 file changed, 30 insertions(+), 13 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index e1fa229acf..f9d6ea65cc 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
>      QEMUFile *file = pss->pss_channel;
>      int len = 0;
>  
> -    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> -        return 0;
> -    }

We need to keep this to disable zero-page-detect on !multifd?

> -
>      if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
>          return 0;
>      }
> @@ -2045,7 +2041,6 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
>   */
>  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
>  {
> -    RAMBlock *block = pss->block;
>      ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>      int res;
>  
> @@ -2061,17 +2056,34 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
>          return 1;
>      }
>  
> +    return ram_save_page(rs, pss);
> +}
> +
> +/**
> + * ram_save_target_page_multifd: send one target page to multifd workers
> + *
> + * Returns 1 if the page was queued, -1 otherwise.
> + *
> + * @rs: current RAM state
> + * @pss: data about the page we want to send
> + */
> +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> +{
> +    RAMBlock *block = pss->block;
> +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> +
>      /*
> -     * Do not use multifd in postcopy as one whole host page should be
> -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
> -     * if host page size == guest page size the dest guest during run may
> -     * still see partially copied pages which is data corruption.
> +     * Backward compatibility support. While using multifd live

We can also avoid mentioning "compatibility support" here - it's a
parameter, user can legally set it to anything.

> +     * migration, we still need to handle zero page checking on the
> +     * migration main thread.
>       */
> -    if (migrate_multifd() && !migration_in_postcopy()) {
> -        return ram_save_multifd_page(block, offset);
> +    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
> +        if (save_zero_page(rs, pss, offset)) {
> +            return 1;
> +        }
>      }
>  
> -    return ram_save_page(rs, pss);
> +    return ram_save_multifd_page(block, offset);
>  }
>  
>  /* Should be called before sending a host page */
> @@ -2983,7 +2995,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      }
>  
>      migration_ops = g_malloc0(sizeof(MigrationOps));
> -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> +
> +    if (migrate_multifd()) {
> +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> +    } else {
> +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> +    }
>  
>      bql_unlock();
>      ret = multifd_send_sync_main();
> -- 
> 2.30.2
>
Hao Xiang March 9, 2024, 2:06 a.m. UTC | #2
> 
> On Sun, Mar 3, 2024 at 11:46 PM Peter Xu <peterx@redhat.com> wrote:
> 
> > 
> > On Fri, Mar 01, 2024 at 02:28:25AM +0000, Hao Xiang wrote:
> > 
> >  1. Add a dedicated handler for MigrationOps::ram_save_target_page in
> > 
> >  multifd live migration.
> > 
> >  2. Refactor ram_save_target_page_legacy so that the legacy and multifd
> > 
> >  handlers don't have internal functions calling into each other.
> > 
> >  Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> > 
> >  Reviewed-by: Fabiano Rosas <farosas@suse.de>
> > 
> >  Message-Id: <20240226195654.934709-4-hao.xiang@bytedance.com>
> > 
> >  ---
> > 
> >  migration/ram.c | 43 ++++++++++++++++++++++++++++++-------------
> > 
> >  1 file changed, 30 insertions(+), 13 deletions(-)
> > 
> >  diff --git a/migration/ram.c b/migration/ram.c
> > 
> >  index e1fa229acf..f9d6ea65cc 100644
> > 
> >  --- a/migration/ram.c
> > 
> >  +++ b/migration/ram.c
> > 
> >  @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
> > 
> >  QEMUFile *file = pss->pss_channel;
> > 
> >  int len = 0;
> > 
> >  - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> > 
> >  - return 0;
> > 
> >  - }
> > 
> >  We need to keep this to disable zero-page-detect on !multifd?

So if multifd is enabled, the new parameter takes effect. If multifd is not enabled, zero page checking will always be done in the main thread, which is exactly the behavior it is now. I thought legacy migration is a deprecated feature so I am trying to not add new stuff to it.

> > 
> >  -
> > 
> >  if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
> > 
> >  return 0;
> > 
> >  }
> > 
> >  @@ -2045,7 +2041,6 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
> > 
> >  */
> > 
> >  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> > 
> >  {
> > 
> >  - RAMBlock *block = pss->block;
> > 
> >  ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > 
> >  int res;
> > 
> >  @@ -2061,17 +2056,34 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> > 
> >  return 1;
> > 
> >  }
> > 
> >  + return ram_save_page(rs, pss);
> > 
> >  +}
> > 
> >  +
> > 
> >  +/**
> > 
> >  + * ram_save_target_page_multifd: send one target page to multifd workers
> > 
> >  + *
> > 
> >  + * Returns 1 if the page was queued, -1 otherwise.
> > 
> >  + *
> > 
> >  + * @rs: current RAM state
> > 
> >  + * @pss: data about the page we want to send
> > 
> >  + */
> > 
> >  +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> > 
> >  +{
> > 
> >  + RAMBlock *block = pss->block;
> > 
> >  + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > 
> >  +
> > 
> >  /*
> > 
> >  - * Do not use multifd in postcopy as one whole host page should be
> > 
> >  - * placed. Meanwhile postcopy requires atomic update of pages, so even
> > 
> >  - * if host page size == guest page size the dest guest during run may
> > 
> >  - * still see partially copied pages which is data corruption.
> > 
> >  + * Backward compatibility support. While using multifd live
> > 
> >  We can also avoid mentioning "compatibility support" here - it's a
> > 
> >  parameter, user can legally set it to anything.

Will drop that.

> > 
> >  + * migration, we still need to handle zero page checking on the
> > 
> >  + * migration main thread.
> > 
> >  */
> > 
> >  - if (migrate_multifd() && !migration_in_postcopy()) {
> > 
> >  - return ram_save_multifd_page(block, offset);
> > 
> >  + if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
> > 
> >  + if (save_zero_page(rs, pss, offset)) {
> > 
> >  + return 1;
> > 
> >  + }
> > 
> >  }
> > 
> >  - return ram_save_page(rs, pss);
> > 
> >  + return ram_save_multifd_page(block, offset);
> > 
> >  }
> > 
> >  /* Should be called before sending a host page */
> > 
> >  @@ -2983,7 +2995,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> > 
> >  }
> > 
> >  migration_ops = g_malloc0(sizeof(MigrationOps));
> > 
> >  - migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > 
> >  +
> > 
> >  + if (migrate_multifd()) {
> > 
> >  + migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> > 
> >  + } else {
> > 
> >  + migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > 
> >  + }
> > 
> >  bql_unlock();
> > 
> >  ret = multifd_send_sync_main();
> > 
> >  --
> > 
> >  2.30.2
> > 
> >  --
> > 
> >  Peter Xu
> >
>
Peter Xu March 11, 2024, 1:20 p.m. UTC | #3
On Sat, Mar 09, 2024 at 02:06:33AM +0000, hao.xiang@linux.dev wrote:
> > >  @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
> > >  QEMUFile *file = pss->pss_channel;
> > >  int len = 0;
> > >
> > >  - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> > >  - return 0;
> > >  - }
> > > 
> > >  We need to keep this to disable zero-page-detect on !multifd?
> 
> So if multifd is enabled, the new parameter takes effect. If multifd is
> not enabled, zero page checking will always be done in the main thread,
> which is exactly the behavior it is now. I thought legacy migration is a
> deprecated feature so I am trying to not add new stuff to it.

There's no plan to deprecate legacy migrations, I think there was a plan to
make multifd the default, but I don't yet think it all thorougly yet, and
even if it happens it doesn't mean we'll remove legacy migration code.

When repost please still make sure this parameter works for both multifd
and !multifd.

Thanks,
Hao Xiang March 11, 2024, 6:02 p.m. UTC | #4
March 11, 2024 at 6:20 AM, "Peter Xu" <peterx@redhat.com> wrote:



> 
> On Sat, Mar 09, 2024 at 02:06:33AM +0000, hao.xiang@linux.dev wrote:
> 
> > 
> > > @@ -1122,10 +1122,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
> > 
> >  > QEMUFile *file = pss->pss_channel;
> > 
> >  > int len = 0;
> > 
> >  >
> > 
> >  > - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
> > 
> >  > - return 0;
> > 
> >  > - }
> > 
> >  > 
> > 
> >  > We need to keep this to disable zero-page-detect on !multifd?
> > 
> >  
> > 
> >  So if multifd is enabled, the new parameter takes effect. If multifd is
> > 
> >  not enabled, zero page checking will always be done in the main thread,
> > 
> >  which is exactly the behavior it is now. I thought legacy migration is a
> > 
> >  deprecated feature so I am trying to not add new stuff to it.
> > 
> 
> There's no plan to deprecate legacy migrations, I think there was a plan to
> 
> make multifd the default, but I don't yet think it all thorougly yet, and
> 
> even if it happens it doesn't mean we'll remove legacy migration code.
> 
> When repost please still make sure this parameter works for both multifd
> 
> and !multifd.
> 
> Thanks,
> 
> -- 
> 
> Peter Xu


Sure. Fixed the issue now and reposted a new patchset.

>
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index e1fa229acf..f9d6ea65cc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1122,10 +1122,6 @@  static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
     QEMUFile *file = pss->pss_channel;
     int len = 0;
 
-    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
-        return 0;
-    }
-
     if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
         return 0;
     }
@@ -2045,7 +2041,6 @@  static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
  */
 static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
-    RAMBlock *block = pss->block;
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     int res;
 
@@ -2061,17 +2056,34 @@  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
         return 1;
     }
 
+    return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+    RAMBlock *block = pss->block;
+    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+
     /*
-     * Do not use multifd in postcopy as one whole host page should be
-     * placed.  Meanwhile postcopy requires atomic update of pages, so even
-     * if host page size == guest page size the dest guest during run may
-     * still see partially copied pages which is data corruption.
+     * Backward compatibility support. While using multifd live
+     * migration, we still need to handle zero page checking on the
+     * migration main thread.
      */
-    if (migrate_multifd() && !migration_in_postcopy()) {
-        return ram_save_multifd_page(block, offset);
+    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+        if (save_zero_page(rs, pss, offset)) {
+            return 1;
+        }
     }
 
-    return ram_save_page(rs, pss);
+    return ram_save_multifd_page(block, offset);
 }
 
 /* Should be called before sending a host page */
@@ -2983,7 +2995,12 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     }
 
     migration_ops = g_malloc0(sizeof(MigrationOps));
-    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+    if (migrate_multifd()) {
+        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+    } else {
+        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+    }
 
     bql_unlock();
     ret = multifd_send_sync_main();