diff mbox series

[v2,04/20] So we use multifd to transmit zero pages.

Message ID 20231114054032.1192027-5-hao.xiang@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Use Intel DSA accelerator to offload zero page checking in multifd live migration. | expand

Commit Message

Hao Xiang Nov. 14, 2023, 5:40 a.m. UTC
From: Juan Quintela <quintela@redhat.com>

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Leonardo Bras <leobras@redhat.com>
---
 migration/multifd.c |  7 ++++---
 migration/options.c | 13 +++++++------
 migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
 qapi/migration.json |  1 -
 4 files changed, 49 insertions(+), 17 deletions(-)

Comments

Fabiano Rosas Nov. 16, 2023, 3:14 p.m. UTC | #1
Hao Xiang <hao.xiang@bytedance.com> writes:

> From: Juan Quintela <quintela@redhat.com>
>
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> Reviewed-by: Leonardo Bras <leobras@redhat.com>
> ---
>  migration/multifd.c |  7 ++++---
>  migration/options.c | 13 +++++++------
>  migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
>  qapi/migration.json |  1 -
>  4 files changed, 49 insertions(+), 17 deletions(-)
>
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 1b994790d5..1198ffde9c 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -13,6 +13,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/cutils.h"
>  #include "qemu/rcu.h"
> +#include "qemu/cutils.h"
>  #include "exec/target_page.h"
>  #include "sysemu/sysemu.h"
>  #include "exec/ramblock.h"
> @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
>      p->packet_num = multifd_send_state->packet_num++;
>      multifd_send_state->pages = p->pages;
>      p->pages = pages;
> -
>      qemu_mutex_unlock(&p->mutex);
>      qemu_sem_post(&p->sem);
>  
> @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
>      MigrationThread *thread = NULL;
>      Error *local_err = NULL;
>      /* qemu older than 8.2 don't understand zero page on multifd channel */
> -    bool use_zero_page = !migrate_use_main_zero_page();
> +    bool use_multifd_zero_page = !migrate_use_main_zero_page();
>      int ret = 0;
>      bool use_zero_copy_send = migrate_zero_copy_send();
>  
> @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
>              RAMBlock *rb = p->pages->block;
>              uint64_t packet_num = p->packet_num;
>              uint32_t flags;
> +
>              p->normal_num = 0;
>              p->zero_num = 0;
>  
> @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
>  
>              for (int i = 0; i < p->pages->num; i++) {
>                  uint64_t offset = p->pages->offset[i];
> -                if (use_zero_page &&
> +                if (use_multifd_zero_page &&

We could have a new function in multifd_ops for zero page
handling. We're already considering an accelerator for the compression
method in the other series[1] and in this series we're adding an
accelerator for zero page checking. It's about time we make the
multifd_ops generic instead of only compression/no compression.

1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
https://lore.kernel.org/r/20231109154638.488213-1-yuan1.liu@intel.com

>                      buffer_is_zero(rb->host + offset, p->page_size)) {
>                      p->zero[p->zero_num] = offset;
>                      p->zero_num++;
> diff --git a/migration/options.c b/migration/options.c
> index 00c0c4a0d6..97d121d4d7 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -195,6 +195,7 @@ Property migration_properties[] = {
>      DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
>      DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
>      DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
> +    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
>      DEFINE_PROP_MIG_CAP("x-background-snapshot",
>              MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
>  #ifdef CONFIG_LINUX
> @@ -288,13 +289,9 @@ bool migrate_multifd(void)
>  
>  bool migrate_use_main_zero_page(void)
>  {
> -    //MigrationState *s;
> -
> -    //s = migrate_get_current();
> +    MigrationState *s = migrate_get_current();
>  
> -    // We will enable this when we add the right code.
> -    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> -    return true;
> +    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];

What happens if we disable main-zero-page while multifd is not enabled?

>  }
>  
>  bool migrate_pause_before_switchover(void)
> @@ -457,6 +454,7 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
>      MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
>      MIGRATION_CAPABILITY_RETURN_PATH,
>      MIGRATION_CAPABILITY_MULTIFD,
> +    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
>      MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
>      MIGRATION_CAPABILITY_AUTO_CONVERGE,
>      MIGRATION_CAPABILITY_RELEASE_RAM,
> @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
>              error_setg(errp, "Postcopy is not yet compatible with multifd");
>              return false;
>          }
> +        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
> +            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
> +        }

Won't this will breaks compatibility for postcopy? A command that used
to work now will have to disable main-zero-page first.

>      }
>  
>      if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
> diff --git a/migration/ram.c b/migration/ram.c
> index 8c7886ab79..f7a42feff2 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2059,17 +2059,42 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
>      if (save_zero_page(rs, pss, offset)) {
>          return 1;
>      }
> -
>      /*
> -     * Do not use multifd in postcopy as one whole host page should be
> -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
> -     * if host page size == guest page size the dest guest during run may
> -     * still see partially copied pages which is data corruption.
> +     * Do not use multifd for:
> +     * 1. Compression as the first page in the new block should be posted out
> +     *    before sending the compressed page
> +     * 2. In postcopy as one whole host page should be placed
>       */
> -    if (migrate_multifd() && !migration_in_postcopy()) {
> +    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
> +        return ram_save_multifd_page(pss->pss_channel, block, offset);
> +    }

This could go into ram_save_target_page_multifd like so:

if (!migrate_compress() && !migration_in_postcopy() && !migration_main_zero_page()) {
    return ram_save_multifd_page(pss->pss_channel, block, offset);
} else {
  return ram_save_target_page_legacy();
}

> +
> +    return ram_save_page(rs, pss);
> +}
> +
> +/**
> + * ram_save_target_page_multifd: save one target page
> + *
> + * Returns the number of pages written
> + *
> + * @rs: current RAM state
> + * @pss: data about the page we want to send
> + */
> +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> +{
> +    RAMBlock *block = pss->block;
> +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> +    int res;
> +
> +    if (!migration_in_postcopy()) {
>          return ram_save_multifd_page(pss->pss_channel, block, offset);
>      }
>  
> +    res = save_zero_page(rs, pss, offset);
> +    if (res > 0) {
> +        return res;
> +    }
> +
>      return ram_save_page(rs, pss);
>  }
>  
> @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      }
>  
>      migration_ops = g_malloc0(sizeof(MigrationOps));
> -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> +
> +    if (migrate_multifd() && !migrate_use_main_zero_page()) {
> +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> +    } else {
> +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> +    }

This should not check main-zero-page. Just have multifd vs. legacy and
have the multifd function defer to _legacy if main-zero-page or
in_postcopy.

>  
>      qemu_mutex_unlock_iothread();
> +
>      ret = multifd_send_sync_main(f);
>      qemu_mutex_lock_iothread();
>      if (ret < 0) {
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 09e4393591..9783289bfc 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -531,7 +531,6 @@
>  #     and can result in more stable read performance.  Requires KVM
>  #     with accelerator property "dirty-ring-size" set.  (Since 8.1)
>  #
> -#
>  # @main-zero-page: If enabled, the detection of zero pages will be
>  #                  done on the main thread.  Otherwise it is done on
>  #                  the multifd threads.
Hao Xiang Jan. 23, 2024, 4:28 a.m. UTC | #2
On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas <farosas@suse.de> wrote:
>
> Hao Xiang <hao.xiang@bytedance.com> writes:
>
> > From: Juan Quintela <quintela@redhat.com>
> >
> > Signed-off-by: Juan Quintela <quintela@redhat.com>
> > Reviewed-by: Leonardo Bras <leobras@redhat.com>
> > ---
> >  migration/multifd.c |  7 ++++---
> >  migration/options.c | 13 +++++++------
> >  migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
> >  qapi/migration.json |  1 -
> >  4 files changed, 49 insertions(+), 17 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 1b994790d5..1198ffde9c 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -13,6 +13,7 @@
> >  #include "qemu/osdep.h"
> >  #include "qemu/cutils.h"
> >  #include "qemu/rcu.h"
> > +#include "qemu/cutils.h"
> >  #include "exec/target_page.h"
> >  #include "sysemu/sysemu.h"
> >  #include "exec/ramblock.h"
> > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
> >      p->packet_num = multifd_send_state->packet_num++;
> >      multifd_send_state->pages = p->pages;
> >      p->pages = pages;
> > -
> >      qemu_mutex_unlock(&p->mutex);
> >      qemu_sem_post(&p->sem);
> >
> > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
> >      MigrationThread *thread = NULL;
> >      Error *local_err = NULL;
> >      /* qemu older than 8.2 don't understand zero page on multifd channel */
> > -    bool use_zero_page = !migrate_use_main_zero_page();
> > +    bool use_multifd_zero_page = !migrate_use_main_zero_page();
> >      int ret = 0;
> >      bool use_zero_copy_send = migrate_zero_copy_send();
> >
> > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
> >              RAMBlock *rb = p->pages->block;
> >              uint64_t packet_num = p->packet_num;
> >              uint32_t flags;
> > +
> >              p->normal_num = 0;
> >              p->zero_num = 0;
> >
> > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
> >
> >              for (int i = 0; i < p->pages->num; i++) {
> >                  uint64_t offset = p->pages->offset[i];
> > -                if (use_zero_page &&
> > +                if (use_multifd_zero_page &&
>
> We could have a new function in multifd_ops for zero page
> handling. We're already considering an accelerator for the compression
> method in the other series[1] and in this series we're adding an
> accelerator for zero page checking. It's about time we make the
> multifd_ops generic instead of only compression/no compression.

Sorry I overlooked this email earlier.
I will extend the multifd_ops interface and add a new API for zero
page checking.

>
> 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
> https://lore.kernel.org/r/20231109154638.488213-1-yuan1.liu@intel.com
>
> >                      buffer_is_zero(rb->host + offset, p->page_size)) {
> >                      p->zero[p->zero_num] = offset;
> >                      p->zero_num++;
> > diff --git a/migration/options.c b/migration/options.c
> > index 00c0c4a0d6..97d121d4d7 100644
> > --- a/migration/options.c
> > +++ b/migration/options.c
> > @@ -195,6 +195,7 @@ Property migration_properties[] = {
> >      DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
> >      DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
> >      DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
> > +    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> >      DEFINE_PROP_MIG_CAP("x-background-snapshot",
> >              MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
> >  #ifdef CONFIG_LINUX
> > @@ -288,13 +289,9 @@ bool migrate_multifd(void)
> >
> >  bool migrate_use_main_zero_page(void)
> >  {
> > -    //MigrationState *s;
> > -
> > -    //s = migrate_get_current();
> > +    MigrationState *s = migrate_get_current();
> >
> > -    // We will enable this when we add the right code.
> > -    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> > -    return true;
> > +    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
>
> What happens if we disable main-zero-page while multifd is not enabled?

In ram.c
...
if (migrate_multifd() && !migrate_use_main_zero_page()) {
migration_ops->ram_save_target_page = ram_save_target_page_multifd;
} else {
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
}
...

So if main-zero-page is disabled and multifd is also disabled, it will
go with the "else" path, which is the legacy path
ram_save_target_page_legacy() and do zero page checking from the main
thread.

>
> >  }
> >
> >  bool migrate_pause_before_switchover(void)
> > @@ -457,6 +454,7 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
> >      MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
> >      MIGRATION_CAPABILITY_RETURN_PATH,
> >      MIGRATION_CAPABILITY_MULTIFD,
> > +    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
> >      MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
> >      MIGRATION_CAPABILITY_AUTO_CONVERGE,
> >      MIGRATION_CAPABILITY_RELEASE_RAM,
> > @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
> >              error_setg(errp, "Postcopy is not yet compatible with multifd");
> >              return false;
> >          }
> > +        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
> > +            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
> > +        }
>
> Won't this will breaks compatibility for postcopy? A command that used
> to work now will have to disable main-zero-page first.

main-zero-page is disabled by default.

>
> >      }
> >
> >      if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 8c7886ab79..f7a42feff2 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2059,17 +2059,42 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> >      if (save_zero_page(rs, pss, offset)) {
> >          return 1;
> >      }
> > -
> >      /*
> > -     * Do not use multifd in postcopy as one whole host page should be
> > -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
> > -     * if host page size == guest page size the dest guest during run may
> > -     * still see partially copied pages which is data corruption.
> > +     * Do not use multifd for:
> > +     * 1. Compression as the first page in the new block should be posted out
> > +     *    before sending the compressed page
> > +     * 2. In postcopy as one whole host page should be placed
> >       */
> > -    if (migrate_multifd() && !migration_in_postcopy()) {
> > +    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
> > +        return ram_save_multifd_page(pss->pss_channel, block, offset);
> > +    }
>
> This could go into ram_save_target_page_multifd like so:
>
> if (!migrate_compress() && !migration_in_postcopy() && !migration_main_zero_page()) {
>     return ram_save_multifd_page(pss->pss_channel, block, offset);
> } else {
>   return ram_save_target_page_legacy();
> }
>
> > +
> > +    return ram_save_page(rs, pss);
> > +}
> > +
> > +/**
> > + * ram_save_target_page_multifd: save one target page
> > + *
> > + * Returns the number of pages written
> > + *
> > + * @rs: current RAM state
> > + * @pss: data about the page we want to send
> > + */
> > +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> > +{
> > +    RAMBlock *block = pss->block;
> > +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > +    int res;
> > +
> > +    if (!migration_in_postcopy()) {
> >          return ram_save_multifd_page(pss->pss_channel, block, offset);
> >      }
> >
> > +    res = save_zero_page(rs, pss, offset);
> > +    if (res > 0) {
> > +        return res;
> > +    }
> > +
> >      return ram_save_page(rs, pss);
> >  }
> >
> > @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >      }
> >
> >      migration_ops = g_malloc0(sizeof(MigrationOps));
> > -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > +
> > +    if (migrate_multifd() && !migrate_use_main_zero_page()) {
> > +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> > +    } else {
> > +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > +    }
>
> This should not check main-zero-page. Just have multifd vs. legacy and
> have the multifd function defer to _legacy if main-zero-page or
> in_postcopy.

I noticed that ram_save_target_page_legacy and
ram_save_target_page_multifd have a lot of overlap and are quite
confusing. I can refactor this path and take-in your comments here.

1) Remove ram_save_multifd_page() call from
ram_save_target_page_legacy(). ram_save_multifd_page() will only be
called in ram_save_target_page_multifd().
2) Remove save_zero_page() and ram_save_page() from
ram_save_target_page_multifd().
3) Postcopy will always go with the ram_save_target_page_legacy() path.
4) Legacy compression will always go with the
ram_save_target_page_legacy() path.
5) Call ram_save_target_page_legacy() from within
ram_save_target_page_multifd() if postcopy or legacy compression.

>
> >
> >      qemu_mutex_unlock_iothread();
> > +
> >      ret = multifd_send_sync_main(f);
> >      qemu_mutex_lock_iothread();
> >      if (ret < 0) {
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 09e4393591..9783289bfc 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -531,7 +531,6 @@
> >  #     and can result in more stable read performance.  Requires KVM
> >  #     with accelerator property "dirty-ring-size" set.  (Since 8.1)
> >  #
> > -#
> >  # @main-zero-page: If enabled, the detection of zero pages will be
> >  #                  done on the main thread.  Otherwise it is done on
> >  #                  the multifd threads.
Hao Xiang Jan. 25, 2024, 9:55 p.m. UTC | #3
On Mon, Jan 22, 2024 at 8:28 PM Hao Xiang <hao.xiang@bytedance.com> wrote:
>
> On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas <farosas@suse.de> wrote:
> >
> > Hao Xiang <hao.xiang@bytedance.com> writes:
> >
> > > From: Juan Quintela <quintela@redhat.com>
> > >
> > > Signed-off-by: Juan Quintela <quintela@redhat.com>
> > > Reviewed-by: Leonardo Bras <leobras@redhat.com>
> > > ---
> > >  migration/multifd.c |  7 ++++---
> > >  migration/options.c | 13 +++++++------
> > >  migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
> > >  qapi/migration.json |  1 -
> > >  4 files changed, 49 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/migration/multifd.c b/migration/multifd.c
> > > index 1b994790d5..1198ffde9c 100644
> > > --- a/migration/multifd.c
> > > +++ b/migration/multifd.c
> > > @@ -13,6 +13,7 @@
> > >  #include "qemu/osdep.h"
> > >  #include "qemu/cutils.h"
> > >  #include "qemu/rcu.h"
> > > +#include "qemu/cutils.h"
> > >  #include "exec/target_page.h"
> > >  #include "sysemu/sysemu.h"
> > >  #include "exec/ramblock.h"
> > > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
> > >      p->packet_num = multifd_send_state->packet_num++;
> > >      multifd_send_state->pages = p->pages;
> > >      p->pages = pages;
> > > -
> > >      qemu_mutex_unlock(&p->mutex);
> > >      qemu_sem_post(&p->sem);
> > >
> > > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
> > >      MigrationThread *thread = NULL;
> > >      Error *local_err = NULL;
> > >      /* qemu older than 8.2 don't understand zero page on multifd channel */
> > > -    bool use_zero_page = !migrate_use_main_zero_page();
> > > +    bool use_multifd_zero_page = !migrate_use_main_zero_page();
> > >      int ret = 0;
> > >      bool use_zero_copy_send = migrate_zero_copy_send();
> > >
> > > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
> > >              RAMBlock *rb = p->pages->block;
> > >              uint64_t packet_num = p->packet_num;
> > >              uint32_t flags;
> > > +
> > >              p->normal_num = 0;
> > >              p->zero_num = 0;
> > >
> > > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
> > >
> > >              for (int i = 0; i < p->pages->num; i++) {
> > >                  uint64_t offset = p->pages->offset[i];
> > > -                if (use_zero_page &&
> > > +                if (use_multifd_zero_page &&
> >
> > We could have a new function in multifd_ops for zero page
> > handling. We're already considering an accelerator for the compression
> > method in the other series[1] and in this series we're adding an
> > accelerator for zero page checking. It's about time we make the
> > multifd_ops generic instead of only compression/no compression.
>
> Sorry I overlooked this email earlier.
> I will extend the multifd_ops interface and add a new API for zero
> page checking.
>
> >
> > 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
> > https://lore.kernel.org/r/20231109154638.488213-1-yuan1.liu@intel.com
> >
> > >                      buffer_is_zero(rb->host + offset, p->page_size)) {
> > >                      p->zero[p->zero_num] = offset;
> > >                      p->zero_num++;
> > > diff --git a/migration/options.c b/migration/options.c
> > > index 00c0c4a0d6..97d121d4d7 100644
> > > --- a/migration/options.c
> > > +++ b/migration/options.c
> > > @@ -195,6 +195,7 @@ Property migration_properties[] = {
> > >      DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
> > >      DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
> > >      DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
> > > +    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> > >      DEFINE_PROP_MIG_CAP("x-background-snapshot",
> > >              MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
> > >  #ifdef CONFIG_LINUX
> > > @@ -288,13 +289,9 @@ bool migrate_multifd(void)
> > >
> > >  bool migrate_use_main_zero_page(void)
> > >  {
> > > -    //MigrationState *s;
> > > -
> > > -    //s = migrate_get_current();
> > > +    MigrationState *s = migrate_get_current();
> > >
> > > -    // We will enable this when we add the right code.
> > > -    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> > > -    return true;
> > > +    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> >
> > What happens if we disable main-zero-page while multifd is not enabled?
>
> In ram.c
> ...
> if (migrate_multifd() && !migrate_use_main_zero_page()) {
> migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> } else {
> migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> }
> ...
>
> So if main-zero-page is disabled and multifd is also disabled, it will
> go with the "else" path, which is the legacy path
> ram_save_target_page_legacy() and do zero page checking from the main
> thread.
>
> >
> > >  }
> > >
> > >  bool migrate_pause_before_switchover(void)
> > > @@ -457,6 +454,7 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
> > >      MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
> > >      MIGRATION_CAPABILITY_RETURN_PATH,
> > >      MIGRATION_CAPABILITY_MULTIFD,
> > > +    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
> > >      MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
> > >      MIGRATION_CAPABILITY_AUTO_CONVERGE,
> > >      MIGRATION_CAPABILITY_RELEASE_RAM,
> > > @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
> > >              error_setg(errp, "Postcopy is not yet compatible with multifd");
> > >              return false;
> > >          }
> > > +        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
> > > +            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
> > > +        }
> >
> > Won't this will breaks compatibility for postcopy? A command that used
> > to work now will have to disable main-zero-page first.
>
> main-zero-page is disabled by default.
>
> >
> > >      }
> > >
> > >      if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index 8c7886ab79..f7a42feff2 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2059,17 +2059,42 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> > >      if (save_zero_page(rs, pss, offset)) {
> > >          return 1;
> > >      }
> > > -
> > >      /*
> > > -     * Do not use multifd in postcopy as one whole host page should be
> > > -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
> > > -     * if host page size == guest page size the dest guest during run may
> > > -     * still see partially copied pages which is data corruption.
> > > +     * Do not use multifd for:
> > > +     * 1. Compression as the first page in the new block should be posted out
> > > +     *    before sending the compressed page
> > > +     * 2. In postcopy as one whole host page should be placed
> > >       */
> > > -    if (migrate_multifd() && !migration_in_postcopy()) {
> > > +    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
> > > +        return ram_save_multifd_page(pss->pss_channel, block, offset);
> > > +    }
> >
> > This could go into ram_save_target_page_multifd like so:
> >
> > if (!migrate_compress() && !migration_in_postcopy() && !migration_main_zero_page()) {
> >     return ram_save_multifd_page(pss->pss_channel, block, offset);
> > } else {
> >   return ram_save_target_page_legacy();
> > }
> >
> > > +
> > > +    return ram_save_page(rs, pss);
> > > +}
> > > +
> > > +/**
> > > + * ram_save_target_page_multifd: save one target page
> > > + *
> > > + * Returns the number of pages written
> > > + *
> > > + * @rs: current RAM state
> > > + * @pss: data about the page we want to send
> > > + */
> > > +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> > > +{
> > > +    RAMBlock *block = pss->block;
> > > +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> > > +    int res;
> > > +
> > > +    if (!migration_in_postcopy()) {
> > >          return ram_save_multifd_page(pss->pss_channel, block, offset);
> > >      }
> > >
> > > +    res = save_zero_page(rs, pss, offset);
> > > +    if (res > 0) {
> > > +        return res;
> > > +    }
> > > +
> > >      return ram_save_page(rs, pss);
> > >  }
> > >
> > > @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> > >      }
> > >
> > >      migration_ops = g_malloc0(sizeof(MigrationOps));
> > > -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > > +
> > > +    if (migrate_multifd() && !migrate_use_main_zero_page()) {
> > > +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> > > +    } else {
> > > +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> > > +    }
> >
> > This should not check main-zero-page. Just have multifd vs. legacy and
> > have the multifd function defer to _legacy if main-zero-page or
> > in_postcopy.
>
> I noticed that ram_save_target_page_legacy and
> ram_save_target_page_multifd have a lot of overlap and are quite
> confusing. I can refactor this path and take-in your comments here.
>
> 1) Remove ram_save_multifd_page() call from
> ram_save_target_page_legacy(). ram_save_multifd_page() will only be
> called in ram_save_target_page_multifd().
> 2) Remove save_zero_page() and ram_save_page() from
> ram_save_target_page_multifd().
> 3) Postcopy will always go with the ram_save_target_page_legacy() path.
> 4) Legacy compression will always go with the
> ram_save_target_page_legacy() path.
> 5) Call ram_save_target_page_legacy() from within
> ram_save_target_page_multifd() if postcopy or legacy compression.
>

Hi Fabiano,
So I spent some time reading the
ram_save_target_page_legacy/ram_save_target_page_multifd code path
Juan wrote and here is my current understanding:
1) Multifd and legacy compression are not compatible.
2) Multifd and postcopy are not compatible.
The compatibility checks are implemented in migrate_caps_check(). So
there is really no need to handle a lot of the complexity in Juan's
code.

I think what we can do is:
1) If multifd is enabled, use ram_save_target_page_multifd().
Otherwise, use ram_save_target_page_legacy().
2) In ram_save_target_page_legacy(), we don't need the special path to
call ram_save_multifd_page(). That can be handled by
ram_save_target_page_multifd() alone.
3) In ram_save_target_page_multifd(), we assert that legacy
compression is not enabled. And we also assert that postcopy is also
not enabled.
4) We do need backward compatibility support for the main zero page
checking case in multifd. So in ram_save_target_page_multifd(), we
call save_zero_page() if migrate_multifd_zero_page() is false.

> >
> > >
> > >      qemu_mutex_unlock_iothread();
> > > +
> > >      ret = multifd_send_sync_main(f);
> > >      qemu_mutex_lock_iothread();
> > >      if (ret < 0) {
> > > diff --git a/qapi/migration.json b/qapi/migration.json
> > > index 09e4393591..9783289bfc 100644
> > > --- a/qapi/migration.json
> > > +++ b/qapi/migration.json
> > > @@ -531,7 +531,6 @@
> > >  #     and can result in more stable read performance.  Requires KVM
> > >  #     with accelerator property "dirty-ring-size" set.  (Since 8.1)
> > >  #
> > > -#
> > >  # @main-zero-page: If enabled, the detection of zero pages will be
> > >  #                  done on the main thread.  Otherwise it is done on
> > >  #                  the multifd threads.
Fabiano Rosas Jan. 25, 2024, 11:14 p.m. UTC | #4
Hao Xiang <hao.xiang@bytedance.com> writes:

> On Mon, Jan 22, 2024 at 8:28 PM Hao Xiang <hao.xiang@bytedance.com> wrote:
>>
>> On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas <farosas@suse.de> wrote:
>> >
>> > Hao Xiang <hao.xiang@bytedance.com> writes:
>> >
>> > > From: Juan Quintela <quintela@redhat.com>
>> > >
>> > > Signed-off-by: Juan Quintela <quintela@redhat.com>
>> > > Reviewed-by: Leonardo Bras <leobras@redhat.com>
>> > > ---
>> > >  migration/multifd.c |  7 ++++---
>> > >  migration/options.c | 13 +++++++------
>> > >  migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
>> > >  qapi/migration.json |  1 -
>> > >  4 files changed, 49 insertions(+), 17 deletions(-)
>> > >
>> > > diff --git a/migration/multifd.c b/migration/multifd.c
>> > > index 1b994790d5..1198ffde9c 100644
>> > > --- a/migration/multifd.c
>> > > +++ b/migration/multifd.c
>> > > @@ -13,6 +13,7 @@
>> > >  #include "qemu/osdep.h"
>> > >  #include "qemu/cutils.h"
>> > >  #include "qemu/rcu.h"
>> > > +#include "qemu/cutils.h"
>> > >  #include "exec/target_page.h"
>> > >  #include "sysemu/sysemu.h"
>> > >  #include "exec/ramblock.h"
>> > > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
>> > >      p->packet_num = multifd_send_state->packet_num++;
>> > >      multifd_send_state->pages = p->pages;
>> > >      p->pages = pages;
>> > > -
>> > >      qemu_mutex_unlock(&p->mutex);
>> > >      qemu_sem_post(&p->sem);
>> > >
>> > > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
>> > >      MigrationThread *thread = NULL;
>> > >      Error *local_err = NULL;
>> > >      /* qemu older than 8.2 don't understand zero page on multifd channel */
>> > > -    bool use_zero_page = !migrate_use_main_zero_page();
>> > > +    bool use_multifd_zero_page = !migrate_use_main_zero_page();
>> > >      int ret = 0;
>> > >      bool use_zero_copy_send = migrate_zero_copy_send();
>> > >
>> > > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
>> > >              RAMBlock *rb = p->pages->block;
>> > >              uint64_t packet_num = p->packet_num;
>> > >              uint32_t flags;
>> > > +
>> > >              p->normal_num = 0;
>> > >              p->zero_num = 0;
>> > >
>> > > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
>> > >
>> > >              for (int i = 0; i < p->pages->num; i++) {
>> > >                  uint64_t offset = p->pages->offset[i];
>> > > -                if (use_zero_page &&
>> > > +                if (use_multifd_zero_page &&
>> >
>> > We could have a new function in multifd_ops for zero page
>> > handling. We're already considering an accelerator for the compression
>> > method in the other series[1] and in this series we're adding an
>> > accelerator for zero page checking. It's about time we make the
>> > multifd_ops generic instead of only compression/no compression.
>>
>> Sorry I overlooked this email earlier.
>> I will extend the multifd_ops interface and add a new API for zero
>> page checking.
>>
>> >
>> > 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
>> > https://lore.kernel.org/r/20231109154638.488213-1-yuan1.liu@intel.com
>> >
>> > >                      buffer_is_zero(rb->host + offset, p->page_size)) {
>> > >                      p->zero[p->zero_num] = offset;
>> > >                      p->zero_num++;
>> > > diff --git a/migration/options.c b/migration/options.c
>> > > index 00c0c4a0d6..97d121d4d7 100644
>> > > --- a/migration/options.c
>> > > +++ b/migration/options.c
>> > > @@ -195,6 +195,7 @@ Property migration_properties[] = {
>> > >      DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
>> > >      DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
>> > >      DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
>> > > +    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
>> > >      DEFINE_PROP_MIG_CAP("x-background-snapshot",
>> > >              MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
>> > >  #ifdef CONFIG_LINUX
>> > > @@ -288,13 +289,9 @@ bool migrate_multifd(void)
>> > >
>> > >  bool migrate_use_main_zero_page(void)
>> > >  {
>> > > -    //MigrationState *s;
>> > > -
>> > > -    //s = migrate_get_current();
>> > > +    MigrationState *s = migrate_get_current();
>> > >
>> > > -    // We will enable this when we add the right code.
>> > > -    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
>> > > -    return true;
>> > > +    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
>> >
>> > What happens if we disable main-zero-page while multifd is not enabled?
>>
>> In ram.c
>> ...
>> if (migrate_multifd() && !migrate_use_main_zero_page()) {
>> migration_ops->ram_save_target_page = ram_save_target_page_multifd;
>> } else {
>> migration_ops->ram_save_target_page = ram_save_target_page_legacy;
>> }
>> ...
>>
>> So if main-zero-page is disabled and multifd is also disabled, it will
>> go with the "else" path, which is the legacy path
>> ram_save_target_page_legacy() and do zero page checking from the main
>> thread.
>>
>> >
>> > >  }
>> > >
>> > >  bool migrate_pause_before_switchover(void)
>> > > @@ -457,6 +454,7 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
>> > >      MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
>> > >      MIGRATION_CAPABILITY_RETURN_PATH,
>> > >      MIGRATION_CAPABILITY_MULTIFD,
>> > > +    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
>> > >      MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
>> > >      MIGRATION_CAPABILITY_AUTO_CONVERGE,
>> > >      MIGRATION_CAPABILITY_RELEASE_RAM,
>> > > @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
>> > >              error_setg(errp, "Postcopy is not yet compatible with multifd");
>> > >              return false;
>> > >          }
>> > > +        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
>> > > +            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
>> > > +        }
>> >
>> > Won't this will breaks compatibility for postcopy? A command that used
>> > to work now will have to disable main-zero-page first.
>>
>> main-zero-page is disabled by default.
>>
>> >
>> > >      }
>> > >
>> > >      if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
>> > > diff --git a/migration/ram.c b/migration/ram.c
>> > > index 8c7886ab79..f7a42feff2 100644
>> > > --- a/migration/ram.c
>> > > +++ b/migration/ram.c
>> > > @@ -2059,17 +2059,42 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
>> > >      if (save_zero_page(rs, pss, offset)) {
>> > >          return 1;
>> > >      }
>> > > -
>> > >      /*
>> > > -     * Do not use multifd in postcopy as one whole host page should be
>> > > -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
>> > > -     * if host page size == guest page size the dest guest during run may
>> > > -     * still see partially copied pages which is data corruption.
>> > > +     * Do not use multifd for:
>> > > +     * 1. Compression as the first page in the new block should be posted out
>> > > +     *    before sending the compressed page
>> > > +     * 2. In postcopy as one whole host page should be placed
>> > >       */
>> > > -    if (migrate_multifd() && !migration_in_postcopy()) {
>> > > +    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
>> > > +        return ram_save_multifd_page(pss->pss_channel, block, offset);
>> > > +    }
>> >
>> > This could go into ram_save_target_page_multifd like so:
>> >
>> > if (!migrate_compress() && !migration_in_postcopy() && !migration_main_zero_page()) {
>> >     return ram_save_multifd_page(pss->pss_channel, block, offset);
>> > } else {
>> >   return ram_save_target_page_legacy();
>> > }
>> >
>> > > +
>> > > +    return ram_save_page(rs, pss);
>> > > +}
>> > > +
>> > > +/**
>> > > + * ram_save_target_page_multifd: save one target page
>> > > + *
>> > > + * Returns the number of pages written
>> > > + *
>> > > + * @rs: current RAM state
>> > > + * @pss: data about the page we want to send
>> > > + */
>> > > +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
>> > > +{
>> > > +    RAMBlock *block = pss->block;
>> > > +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
>> > > +    int res;
>> > > +
>> > > +    if (!migration_in_postcopy()) {
>> > >          return ram_save_multifd_page(pss->pss_channel, block, offset);
>> > >      }
>> > >
>> > > +    res = save_zero_page(rs, pss, offset);
>> > > +    if (res > 0) {
>> > > +        return res;
>> > > +    }
>> > > +
>> > >      return ram_save_page(rs, pss);
>> > >  }
>> > >
>> > > @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>> > >      }
>> > >
>> > >      migration_ops = g_malloc0(sizeof(MigrationOps));
>> > > -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
>> > > +
>> > > +    if (migrate_multifd() && !migrate_use_main_zero_page()) {
>> > > +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
>> > > +    } else {
>> > > +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
>> > > +    }
>> >
>> > This should not check main-zero-page. Just have multifd vs. legacy and
>> > have the multifd function defer to _legacy if main-zero-page or
>> > in_postcopy.
>>
>> I noticed that ram_save_target_page_legacy and
>> ram_save_target_page_multifd have a lot of overlap and are quite
>> confusing. I can refactor this path and take-in your comments here.
>>
>> 1) Remove ram_save_multifd_page() call from
>> ram_save_target_page_legacy(). ram_save_multifd_page() will only be
>> called in ram_save_target_page_multifd().
>> 2) Remove save_zero_page() and ram_save_page() from
>> ram_save_target_page_multifd().
>> 3) Postcopy will always go with the ram_save_target_page_legacy() path.
>> 4) Legacy compression will always go with the
>> ram_save_target_page_legacy() path.
>> 5) Call ram_save_target_page_legacy() from within
>> ram_save_target_page_multifd() if postcopy or legacy compression.
>>
>
> Hi Fabiano,
> So I spent some time reading the
> ram_save_target_page_legacy/ram_save_target_page_multifd code path
> Juan wrote and here is my current understanding:
> 1) Multifd and legacy compression are not compatible.
> 2) Multifd and postcopy are not compatible.
> The compatibility checks are implemented in migrate_caps_check(). So
> there is really no need to handle a lot of the complexity in Juan's
> code.
>
> I think what we can do is:
> 1) If multifd is enabled, use ram_save_target_page_multifd().
> Otherwise, use ram_save_target_page_legacy().
> 2) In ram_save_target_page_legacy(), we don't need the special path to
> call ram_save_multifd_page(). That can be handled by
> ram_save_target_page_multifd() alone.
> 3) In ram_save_target_page_multifd(), we assert that legacy
> compression is not enabled. And we also assert that postcopy is also
> not enabled.
> 4) We do need backward compatibility support for the main zero page
> checking case in multifd. So in ram_save_target_page_multifd(), we
> call save_zero_page() if migrate_multifd_zero_page() is false.
>

Sounds good. Could you apply those changes and the capability we
discussed in the other message and send a separate series?  I haven't
found the time to work on this yet.
Hao Xiang Jan. 25, 2024, 11:46 p.m. UTC | #5
On Thu, Jan 25, 2024 at 3:14 PM Fabiano Rosas <farosas@suse.de> wrote:
>
> Hao Xiang <hao.xiang@bytedance.com> writes:
>
> > On Mon, Jan 22, 2024 at 8:28 PM Hao Xiang <hao.xiang@bytedance.com> wrote:
> >>
> >> On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas <farosas@suse.de> wrote:
> >> >
> >> > Hao Xiang <hao.xiang@bytedance.com> writes:
> >> >
> >> > > From: Juan Quintela <quintela@redhat.com>
> >> > >
> >> > > Signed-off-by: Juan Quintela <quintela@redhat.com>
> >> > > Reviewed-by: Leonardo Bras <leobras@redhat.com>
> >> > > ---
> >> > >  migration/multifd.c |  7 ++++---
> >> > >  migration/options.c | 13 +++++++------
> >> > >  migration/ram.c     | 45 ++++++++++++++++++++++++++++++++++++++-------
> >> > >  qapi/migration.json |  1 -
> >> > >  4 files changed, 49 insertions(+), 17 deletions(-)
> >> > >
> >> > > diff --git a/migration/multifd.c b/migration/multifd.c
> >> > > index 1b994790d5..1198ffde9c 100644
> >> > > --- a/migration/multifd.c
> >> > > +++ b/migration/multifd.c
> >> > > @@ -13,6 +13,7 @@
> >> > >  #include "qemu/osdep.h"
> >> > >  #include "qemu/cutils.h"
> >> > >  #include "qemu/rcu.h"
> >> > > +#include "qemu/cutils.h"
> >> > >  #include "exec/target_page.h"
> >> > >  #include "sysemu/sysemu.h"
> >> > >  #include "exec/ramblock.h"
> >> > > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
> >> > >      p->packet_num = multifd_send_state->packet_num++;
> >> > >      multifd_send_state->pages = p->pages;
> >> > >      p->pages = pages;
> >> > > -
> >> > >      qemu_mutex_unlock(&p->mutex);
> >> > >      qemu_sem_post(&p->sem);
> >> > >
> >> > > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
> >> > >      MigrationThread *thread = NULL;
> >> > >      Error *local_err = NULL;
> >> > >      /* qemu older than 8.2 don't understand zero page on multifd channel */
> >> > > -    bool use_zero_page = !migrate_use_main_zero_page();
> >> > > +    bool use_multifd_zero_page = !migrate_use_main_zero_page();
> >> > >      int ret = 0;
> >> > >      bool use_zero_copy_send = migrate_zero_copy_send();
> >> > >
> >> > > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
> >> > >              RAMBlock *rb = p->pages->block;
> >> > >              uint64_t packet_num = p->packet_num;
> >> > >              uint32_t flags;
> >> > > +
> >> > >              p->normal_num = 0;
> >> > >              p->zero_num = 0;
> >> > >
> >> > > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
> >> > >
> >> > >              for (int i = 0; i < p->pages->num; i++) {
> >> > >                  uint64_t offset = p->pages->offset[i];
> >> > > -                if (use_zero_page &&
> >> > > +                if (use_multifd_zero_page &&
> >> >
> >> > We could have a new function in multifd_ops for zero page
> >> > handling. We're already considering an accelerator for the compression
> >> > method in the other series[1] and in this series we're adding an
> >> > accelerator for zero page checking. It's about time we make the
> >> > multifd_ops generic instead of only compression/no compression.
> >>
> >> Sorry I overlooked this email earlier.
> >> I will extend the multifd_ops interface and add a new API for zero
> >> page checking.
> >>
> >> >
> >> > 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
> >> > https://lore.kernel.org/r/20231109154638.488213-1-yuan1.liu@intel.com
> >> >
> >> > >                      buffer_is_zero(rb->host + offset, p->page_size)) {
> >> > >                      p->zero[p->zero_num] = offset;
> >> > >                      p->zero_num++;
> >> > > diff --git a/migration/options.c b/migration/options.c
> >> > > index 00c0c4a0d6..97d121d4d7 100644
> >> > > --- a/migration/options.c
> >> > > +++ b/migration/options.c
> >> > > @@ -195,6 +195,7 @@ Property migration_properties[] = {
> >> > >      DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
> >> > >      DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
> >> > >      DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
> >> > > +    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> >> > >      DEFINE_PROP_MIG_CAP("x-background-snapshot",
> >> > >              MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
> >> > >  #ifdef CONFIG_LINUX
> >> > > @@ -288,13 +289,9 @@ bool migrate_multifd(void)
> >> > >
> >> > >  bool migrate_use_main_zero_page(void)
> >> > >  {
> >> > > -    //MigrationState *s;
> >> > > -
> >> > > -    //s = migrate_get_current();
> >> > > +    MigrationState *s = migrate_get_current();
> >> > >
> >> > > -    // We will enable this when we add the right code.
> >> > > -    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> >> > > -    return true;
> >> > > +    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> >> >
> >> > What happens if we disable main-zero-page while multifd is not enabled?
> >>
> >> In ram.c
> >> ...
> >> if (migrate_multifd() && !migrate_use_main_zero_page()) {
> >> migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> >> } else {
> >> migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> >> }
> >> ...
> >>
> >> So if main-zero-page is disabled and multifd is also disabled, it will
> >> go with the "else" path, which is the legacy path
> >> ram_save_target_page_legacy() and do zero page checking from the main
> >> thread.
> >>
> >> >
> >> > >  }
> >> > >
> >> > >  bool migrate_pause_before_switchover(void)
> >> > > @@ -457,6 +454,7 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
> >> > >      MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
> >> > >      MIGRATION_CAPABILITY_RETURN_PATH,
> >> > >      MIGRATION_CAPABILITY_MULTIFD,
> >> > > +    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
> >> > >      MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
> >> > >      MIGRATION_CAPABILITY_AUTO_CONVERGE,
> >> > >      MIGRATION_CAPABILITY_RELEASE_RAM,
> >> > > @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
> >> > >              error_setg(errp, "Postcopy is not yet compatible with multifd");
> >> > >              return false;
> >> > >          }
> >> > > +        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
> >> > > +            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
> >> > > +        }
> >> >
> >> > Won't this will breaks compatibility for postcopy? A command that used
> >> > to work now will have to disable main-zero-page first.
> >>
> >> main-zero-page is disabled by default.
> >>
> >> >
> >> > >      }
> >> > >
> >> > >      if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
> >> > > diff --git a/migration/ram.c b/migration/ram.c
> >> > > index 8c7886ab79..f7a42feff2 100644
> >> > > --- a/migration/ram.c
> >> > > +++ b/migration/ram.c
> >> > > @@ -2059,17 +2059,42 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
> >> > >      if (save_zero_page(rs, pss, offset)) {
> >> > >          return 1;
> >> > >      }
> >> > > -
> >> > >      /*
> >> > > -     * Do not use multifd in postcopy as one whole host page should be
> >> > > -     * placed.  Meanwhile postcopy requires atomic update of pages, so even
> >> > > -     * if host page size == guest page size the dest guest during run may
> >> > > -     * still see partially copied pages which is data corruption.
> >> > > +     * Do not use multifd for:
> >> > > +     * 1. Compression as the first page in the new block should be posted out
> >> > > +     *    before sending the compressed page
> >> > > +     * 2. In postcopy as one whole host page should be placed
> >> > >       */
> >> > > -    if (migrate_multifd() && !migration_in_postcopy()) {
> >> > > +    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
> >> > > +        return ram_save_multifd_page(pss->pss_channel, block, offset);
> >> > > +    }
> >> >
> >> > This could go into ram_save_target_page_multifd like so:
> >> >
> >> > if (!migrate_compress() && !migration_in_postcopy() && !migration_main_zero_page()) {
> >> >     return ram_save_multifd_page(pss->pss_channel, block, offset);
> >> > } else {
> >> >   return ram_save_target_page_legacy();
> >> > }
> >> >
> >> > > +
> >> > > +    return ram_save_page(rs, pss);
> >> > > +}
> >> > > +
> >> > > +/**
> >> > > + * ram_save_target_page_multifd: save one target page
> >> > > + *
> >> > > + * Returns the number of pages written
> >> > > + *
> >> > > + * @rs: current RAM state
> >> > > + * @pss: data about the page we want to send
> >> > > + */
> >> > > +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
> >> > > +{
> >> > > +    RAMBlock *block = pss->block;
> >> > > +    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
> >> > > +    int res;
> >> > > +
> >> > > +    if (!migration_in_postcopy()) {
> >> > >          return ram_save_multifd_page(pss->pss_channel, block, offset);
> >> > >      }
> >> > >
> >> > > +    res = save_zero_page(rs, pss, offset);
> >> > > +    if (res > 0) {
> >> > > +        return res;
> >> > > +    }
> >> > > +
> >> > >      return ram_save_page(rs, pss);
> >> > >  }
> >> > >
> >> > > @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >> > >      }
> >> > >
> >> > >      migration_ops = g_malloc0(sizeof(MigrationOps));
> >> > > -    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> >> > > +
> >> > > +    if (migrate_multifd() && !migrate_use_main_zero_page()) {
> >> > > +        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
> >> > > +    } else {
> >> > > +        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
> >> > > +    }
> >> >
> >> > This should not check main-zero-page. Just have multifd vs. legacy and
> >> > have the multifd function defer to _legacy if main-zero-page or
> >> > in_postcopy.
> >>
> >> I noticed that ram_save_target_page_legacy and
> >> ram_save_target_page_multifd have a lot of overlap and are quite
> >> confusing. I can refactor this path and take-in your comments here.
> >>
> >> 1) Remove ram_save_multifd_page() call from
> >> ram_save_target_page_legacy(). ram_save_multifd_page() will only be
> >> called in ram_save_target_page_multifd().
> >> 2) Remove save_zero_page() and ram_save_page() from
> >> ram_save_target_page_multifd().
> >> 3) Postcopy will always go with the ram_save_target_page_legacy() path.
> >> 4) Legacy compression will always go with the
> >> ram_save_target_page_legacy() path.
> >> 5) Call ram_save_target_page_legacy() from within
> >> ram_save_target_page_multifd() if postcopy or legacy compression.
> >>
> >
> > Hi Fabiano,
> > So I spent some time reading the
> > ram_save_target_page_legacy/ram_save_target_page_multifd code path
> > Juan wrote and here is my current understanding:
> > 1) Multifd and legacy compression are not compatible.
> > 2) Multifd and postcopy are not compatible.
> > The compatibility checks are implemented in migrate_caps_check(). So
> > there is really no need to handle a lot of the complexity in Juan's
> > code.
> >
> > I think what we can do is:
> > 1) If multifd is enabled, use ram_save_target_page_multifd().
> > Otherwise, use ram_save_target_page_legacy().
> > 2) In ram_save_target_page_legacy(), we don't need the special path to
> > call ram_save_multifd_page(). That can be handled by
> > ram_save_target_page_multifd() alone.
> > 3) In ram_save_target_page_multifd(), we assert that legacy
> > compression is not enabled. And we also assert that postcopy is also
> > not enabled.
> > 4) We do need backward compatibility support for the main zero page
> > checking case in multifd. So in ram_save_target_page_multifd(), we
> > call save_zero_page() if migrate_multifd_zero_page() is false.
> >
>
> Sounds good. Could you apply those changes and the capability we
> discussed in the other message and send a separate series?  I haven't
> found the time to work on this yet.
>

Sure, I will send out a separate series for multifd zero page checking.
diff mbox series

Patch

diff --git a/migration/multifd.c b/migration/multifd.c
index 1b994790d5..1198ffde9c 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -13,6 +13,7 @@ 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/rcu.h"
+#include "qemu/cutils.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
@@ -459,7 +460,6 @@  static int multifd_send_pages(QEMUFile *f)
     p->packet_num = multifd_send_state->packet_num++;
     multifd_send_state->pages = p->pages;
     p->pages = pages;
-
     qemu_mutex_unlock(&p->mutex);
     qemu_sem_post(&p->sem);
 
@@ -684,7 +684,7 @@  static void *multifd_send_thread(void *opaque)
     MigrationThread *thread = NULL;
     Error *local_err = NULL;
     /* qemu older than 8.2 don't understand zero page on multifd channel */
-    bool use_zero_page = !migrate_use_main_zero_page();
+    bool use_multifd_zero_page = !migrate_use_main_zero_page();
     int ret = 0;
     bool use_zero_copy_send = migrate_zero_copy_send();
 
@@ -713,6 +713,7 @@  static void *multifd_send_thread(void *opaque)
             RAMBlock *rb = p->pages->block;
             uint64_t packet_num = p->packet_num;
             uint32_t flags;
+
             p->normal_num = 0;
             p->zero_num = 0;
 
@@ -724,7 +725,7 @@  static void *multifd_send_thread(void *opaque)
 
             for (int i = 0; i < p->pages->num; i++) {
                 uint64_t offset = p->pages->offset[i];
-                if (use_zero_page &&
+                if (use_multifd_zero_page &&
                     buffer_is_zero(rb->host + offset, p->page_size)) {
                     p->zero[p->zero_num] = offset;
                     p->zero_num++;
diff --git a/migration/options.c b/migration/options.c
index 00c0c4a0d6..97d121d4d7 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -195,6 +195,7 @@  Property migration_properties[] = {
     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
+    DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
     DEFINE_PROP_MIG_CAP("x-background-snapshot",
             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
 #ifdef CONFIG_LINUX
@@ -288,13 +289,9 @@  bool migrate_multifd(void)
 
 bool migrate_use_main_zero_page(void)
 {
-    //MigrationState *s;
-
-    //s = migrate_get_current();
+    MigrationState *s = migrate_get_current();
 
-    // We will enable this when we add the right code.
-    // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
-    return true;
+    return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
 }
 
 bool migrate_pause_before_switchover(void)
@@ -457,6 +454,7 @@  INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
     MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
     MIGRATION_CAPABILITY_RETURN_PATH,
     MIGRATION_CAPABILITY_MULTIFD,
+    MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
     MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
     MIGRATION_CAPABILITY_AUTO_CONVERGE,
     MIGRATION_CAPABILITY_RELEASE_RAM,
@@ -534,6 +532,9 @@  bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
             error_setg(errp, "Postcopy is not yet compatible with multifd");
             return false;
         }
+        if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) {
+            error_setg(errp, "Postcopy is not yet compatible with main zero copy");
+        }
     }
 
     if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
diff --git a/migration/ram.c b/migration/ram.c
index 8c7886ab79..f7a42feff2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2059,17 +2059,42 @@  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
     if (save_zero_page(rs, pss, offset)) {
         return 1;
     }
-
     /*
-     * Do not use multifd in postcopy as one whole host page should be
-     * placed.  Meanwhile postcopy requires atomic update of pages, so even
-     * if host page size == guest page size the dest guest during run may
-     * still see partially copied pages which is data corruption.
+     * Do not use multifd for:
+     * 1. Compression as the first page in the new block should be posted out
+     *    before sending the compressed page
+     * 2. In postcopy as one whole host page should be placed
      */
-    if (migrate_multifd() && !migration_in_postcopy()) {
+    if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) {
+        return ram_save_multifd_page(pss->pss_channel, block, offset);
+    }
+
+    return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: save one target page
+ *
+ * Returns the number of pages written
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+    RAMBlock *block = pss->block;
+    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
+    int res;
+
+    if (!migration_in_postcopy()) {
         return ram_save_multifd_page(pss->pss_channel, block, offset);
     }
 
+    res = save_zero_page(rs, pss, offset);
+    if (res > 0) {
+        return res;
+    }
+
     return ram_save_page(rs, pss);
 }
 
@@ -2982,9 +3007,15 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     }
 
     migration_ops = g_malloc0(sizeof(MigrationOps));
-    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+
+    if (migrate_multifd() && !migrate_use_main_zero_page()) {
+        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+    } else {
+        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+    }
 
     qemu_mutex_unlock_iothread();
+
     ret = multifd_send_sync_main(f);
     qemu_mutex_lock_iothread();
     if (ret < 0) {
diff --git a/qapi/migration.json b/qapi/migration.json
index 09e4393591..9783289bfc 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -531,7 +531,6 @@ 
 #     and can result in more stable read performance.  Requires KVM
 #     with accelerator property "dirty-ring-size" set.  (Since 8.1)
 #
-#
 # @main-zero-page: If enabled, the detection of zero pages will be
 #                  done on the main thread.  Otherwise it is done on
 #                  the multifd threads.