diff mbox series

[RFC,12/21] migration: Introduce page size for-migration-only

Message ID 20230117220914.2062125-13-peterx@redhat.com (mailing list archive)
State New, archived
Headers show
Series migration: Support hugetlb doublemaps | expand

Commit Message

Peter Xu Jan. 17, 2023, 10:09 p.m. UTC
Migration may not want to recognize memory chunks in page size of the host
only, but sometimes we may want to recognize the memory in smaller chunks
if e.g. they're doubly mapped as both huge and small.

In those cases we'll prefer to assume the memory page size is always mapped
small (qemu_real_host_page_size) and we'll do things just like when the
pages was only smally mapped.

Let's do this to be prepared of postcopy double-mapping for hugetlbfs.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.c    |  6 ++++--
 migration/postcopy-ram.c | 16 +++++++++-------
 migration/ram.c          | 29 ++++++++++++++++++++++-------
 migration/ram.h          |  1 +
 4 files changed, 36 insertions(+), 16 deletions(-)

Comments

Dr. David Alan Gilbert Jan. 24, 2023, 1:20 p.m. UTC | #1
* Peter Xu (peterx@redhat.com) wrote:
> Migration may not want to recognize memory chunks in page size of the host
> only, but sometimes we may want to recognize the memory in smaller chunks
> if e.g. they're doubly mapped as both huge and small.
> 
> In those cases we'll prefer to assume the memory page size is always mapped
> small (qemu_real_host_page_size) and we'll do things just like when the
> pages was only smally mapped.
> 
> Let's do this to be prepared of postcopy double-mapping for hugetlbfs.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/migration.c    |  6 ++++--
>  migration/postcopy-ram.c | 16 +++++++++-------
>  migration/ram.c          | 29 ++++++++++++++++++++++-------
>  migration/ram.h          |  1 +
>  4 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index b174f2af92..f6fe474fc3 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -408,7 +408,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
>  {
>      uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
>      size_t msglen = 12; /* start + len */
> -    size_t len = qemu_ram_pagesize(rb);
> +    size_t len = migration_ram_pagesize(rb);
>      enum mig_rp_message_type msg_type;
>      const char *rbname;
>      int rbname_len;
> @@ -443,8 +443,10 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
>  int migrate_send_rp_req_pages(MigrationIncomingState *mis,
>                                RAMBlock *rb, ram_addr_t start, uint64_t haddr)
>  {
> -    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
>      bool received = false;
> +    void *aligned;
> +
> +    aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, migration_ram_pagesize(rb));
>  
>      WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
>          received = ramblock_recv_bitmap_test_byte_offset(rb, start);
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 2c86bfc091..acae1dc6ae 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -694,7 +694,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
>                           uint64_t client_addr,
>                           RAMBlock *rb)
>  {
> -    size_t pagesize = qemu_ram_pagesize(rb);
> +    size_t pagesize = migration_ram_pagesize(rb);
>      struct uffdio_range range;
>      int ret;
>      trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
> @@ -712,7 +712,9 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
>  static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
>                                   ram_addr_t start, uint64_t haddr)
>  {
> -    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
> +    void *aligned;
> +
> +    aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, migration_ram_pagesize(rb));
>  
>      /*
>       * Discarded pages (via RamDiscardManager) are never migrated. On unlikely
> @@ -722,7 +724,7 @@ static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
>       * Checking a single bit is sufficient to handle pagesize > TPS as either
>       * all relevant bits are set or not.
>       */
> -    assert(QEMU_IS_ALIGNED(start, qemu_ram_pagesize(rb)));
> +    assert(QEMU_IS_ALIGNED(start, migration_ram_pagesize(rb)));
>      if (ramblock_page_is_discarded(rb, start)) {
>          bool received = ramblock_recv_bitmap_test_byte_offset(rb, start);
>  
> @@ -740,7 +742,7 @@ static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
>  int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
>                                   uint64_t client_addr, uint64_t rb_offset)
>  {
> -    uint64_t aligned_rbo = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
> +    uint64_t aligned_rbo = ROUND_DOWN(rb_offset, migration_ram_pagesize(rb));
>      MigrationIncomingState *mis = migration_incoming_get_current();
>  
>      trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
> @@ -1020,7 +1022,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
>                  break;
>              }
>  
> -            rb_offset = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
> +            rb_offset = ROUND_DOWN(rb_offset, migration_ram_pagesize(rb));
>              trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
>                                                  qemu_ram_get_idstr(rb),
>                                                  rb_offset,
> @@ -1281,7 +1283,7 @@ int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset)
>  int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
>                          RAMBlock *rb)
>  {
> -    size_t pagesize = qemu_ram_pagesize(rb);
> +    size_t pagesize = migration_ram_pagesize(rb);
>  
>      /* copy also acks to the kernel waking the stalled thread up
>       * TODO: We can inhibit that ack and only do it if it was requested
> @@ -1308,7 +1310,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
>  int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
>                               RAMBlock *rb)
>  {
> -    size_t pagesize = qemu_ram_pagesize(rb);
> +    size_t pagesize = migration_ram_pagesize(rb);
>      trace_postcopy_place_page_zero(host);
>  
>      /* Normal RAMBlocks can zero a page using UFFDIO_ZEROPAGE
> diff --git a/migration/ram.c b/migration/ram.c
> index 334309f1c6..945c6477fd 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -121,6 +121,20 @@ static struct {
>      uint8_t *decoded_buf;
>  } XBZRLE;
>  
> +/* Get the page size we should use for migration purpose. */
> +size_t migration_ram_pagesize(RAMBlock *block)
> +{
> +    /*
> +     * When hugetlb doublemap is enabled, we should always use the smallest
> +     * page for migration.
> +     */
> +    if (migrate_hugetlb_doublemap()) {
> +        return qemu_real_host_page_size();
> +    }
> +
> +    return qemu_ram_pagesize(block);
> +}
> +
>  static void XBZRLE_cache_lock(void)
>  {
>      if (migrate_use_xbzrle()) {
> @@ -1049,7 +1063,7 @@ bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
>          MemoryRegionSection section = {
>              .mr = rb->mr,
>              .offset_within_region = start,
> -            .size = int128_make64(qemu_ram_pagesize(rb)),
> +            .size = int128_make64(migration_ram_pagesize(rb)),
>          };
>  
>          return !ram_discard_manager_is_populated(rdm, &section);
> @@ -2152,7 +2166,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
>       */
>      if (postcopy_preempt_active()) {
>          ram_addr_t page_start = start >> TARGET_PAGE_BITS;
> -        size_t page_size = qemu_ram_pagesize(ramblock);
> +        size_t page_size = migration_ram_pagesize(ramblock);
>          PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY];
>          int ret = 0;
>  
> @@ -2316,7 +2330,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
>  static void pss_host_page_prepare(PageSearchStatus *pss)
>  {
>      /* How many guest pages are there in one host page? */
> -    size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
> +    size_t guest_pfns = migration_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>  
>      pss->host_page_sending = true;
>      pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);
> @@ -2425,7 +2439,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
>      bool page_dirty, preempt_active = postcopy_preempt_active();
>      int tmppages, pages = 0;
>      size_t pagesize_bits =
> -        qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
> +        migration_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>      unsigned long start_page = pss->page;
>      int res;
>  
> @@ -3518,7 +3532,7 @@ static void *host_page_from_ram_block_offset(RAMBlock *block,
>  {
>      /* Note: Explicitly no check against offset_in_ramblock(). */
>      return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
> -                                   block->page_size);
> +                                   migration_ram_pagesize(block));
>  }
>  
>  static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
> @@ -3970,7 +3984,8 @@ int ram_load_postcopy(QEMUFile *f, int channel)
>                  break;
>              }
>              tmp_page->target_pages++;
> -            matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
> +            matches_target_page_size =
> +                migration_ram_pagesize(block) == TARGET_PAGE_SIZE;
>              /*
>               * Postcopy requires that we place whole host pages atomically;
>               * these may be huge pages for RAMBlocks that are backed by

Hmm do you really want this change?

Dave

> @@ -4005,7 +4020,7 @@ int ram_load_postcopy(QEMUFile *f, int channel)
>               * page
>               */
>              if (tmp_page->target_pages ==
> -                (block->page_size / TARGET_PAGE_SIZE)) {
> +                (migration_ram_pagesize(block) / TARGET_PAGE_SIZE)) {
>                  place_needed = true;
>              }
>              place_source = tmp_page->tmp_huge_page;
> diff --git a/migration/ram.h b/migration/ram.h
> index 81cbb0947c..162b3e7cb8 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -68,6 +68,7 @@ bool ramblock_is_ignored(RAMBlock *block);
>          if (!qemu_ram_is_migratable(block)) {} else
>  
>  int xbzrle_cache_resize(uint64_t new_size, Error **errp);
> +size_t migration_ram_pagesize(RAMBlock *block);
>  uint64_t ram_bytes_remaining(void);
>  uint64_t ram_bytes_total(void);
>  void mig_throttle_counter_reset(void);
> -- 
> 2.37.3
>
Peter Xu Jan. 24, 2023, 9:36 p.m. UTC | #2
On Tue, Jan 24, 2023 at 01:20:37PM +0000, Dr. David Alan Gilbert wrote:
> > @@ -3970,7 +3984,8 @@ int ram_load_postcopy(QEMUFile *f, int channel)
> >                  break;
> >              }
> >              tmp_page->target_pages++;
> > -            matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
> > +            matches_target_page_size =
> > +                migration_ram_pagesize(block) == TARGET_PAGE_SIZE;
> >              /*
> >               * Postcopy requires that we place whole host pages atomically;
> >               * these may be huge pages for RAMBlocks that are backed by
> 
> Hmm do you really want this change?

Yes that's intended.  I want to reuse the same logic here when receiving
small pages from huge pages, just like when we're receiving small pages on
non-hugetlb mappings.

matches_target_page_size majorly affects two things:

  1) For a small zero page, whether we want to pre-set the page_buffer, or
     simply use postcopy_place_page_zero():
  
        case RAM_SAVE_FLAG_ZERO:
            ch = qemu_get_byte(f);
            /*
             * Can skip to set page_buffer when
             * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
             */
            if (ch || !matches_target_page_size) {
                memset(page_buffer, ch, TARGET_PAGE_SIZE);
            }

  2) For normal page, whether we need to use a page buffer or we can
     directly reuse the page buffer in QEMUFile:

            if (!matches_target_page_size) {
                /* For huge pages, we always use temporary buffer */
                qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
            } else {
                /*
                 * For small pages that matches target page size, we
                 * avoid the qemu_file copy.  Instead we directly use
                 * the buffer of QEMUFile to place the page.  Note: we
                 * cannot do any QEMUFile operation before using that
                 * buffer to make sure the buffer is valid when
                 * placing the page.
                 */
                qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
                                         TARGET_PAGE_SIZE);
            }

Here:

I want 1) to reuse postcopy_place_page_zero().  For the doublemap case,
it'll reuse postcopy_tmp_zero_page() (because qemu_ram_is_uf_zeroable()
will return false for such a ramblock).

I want 2) to reuse qemu_get_buffer_in_place(), so we avoid a copy process
for the small page which is faster (even if it's hugetlb backed, now we can
reuse the qemufile buffer safely).

Thanks,
Peter Xu Jan. 24, 2023, 10:03 p.m. UTC | #3
On Tue, Jan 24, 2023 at 04:36:20PM -0500, Peter Xu wrote:
> On Tue, Jan 24, 2023 at 01:20:37PM +0000, Dr. David Alan Gilbert wrote:
> > > @@ -3970,7 +3984,8 @@ int ram_load_postcopy(QEMUFile *f, int channel)
> > >                  break;
> > >              }
> > >              tmp_page->target_pages++;
> > > -            matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
> > > +            matches_target_page_size =
> > > +                migration_ram_pagesize(block) == TARGET_PAGE_SIZE;
> > >              /*
> > >               * Postcopy requires that we place whole host pages atomically;
> > >               * these may be huge pages for RAMBlocks that are backed by
> > 
> > Hmm do you really want this change?
> 
> Yes that's intended.  I want to reuse the same logic here when receiving
> small pages from huge pages, just like when we're receiving small pages on
> non-hugetlb mappings.
> 
> matches_target_page_size majorly affects two things:
> 
>   1) For a small zero page, whether we want to pre-set the page_buffer, or
>      simply use postcopy_place_page_zero():
>   
>         case RAM_SAVE_FLAG_ZERO:
>             ch = qemu_get_byte(f);
>             /*
>              * Can skip to set page_buffer when
>              * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
>              */
>             if (ch || !matches_target_page_size) {
>                 memset(page_buffer, ch, TARGET_PAGE_SIZE);
>             }
> 
>   2) For normal page, whether we need to use a page buffer or we can
>      directly reuse the page buffer in QEMUFile:
> 
>             if (!matches_target_page_size) {
>                 /* For huge pages, we always use temporary buffer */
>                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
>             } else {
>                 /*
>                  * For small pages that matches target page size, we
>                  * avoid the qemu_file copy.  Instead we directly use
>                  * the buffer of QEMUFile to place the page.  Note: we
>                  * cannot do any QEMUFile operation before using that
>                  * buffer to make sure the buffer is valid when
>                  * placing the page.
>                  */
>                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
>                                          TARGET_PAGE_SIZE);
>             }
> 
> Here:
> 
> I want 1) to reuse postcopy_place_page_zero().  For the doublemap case,
> it'll reuse postcopy_tmp_zero_page() (because qemu_ram_is_uf_zeroable()
> will return false for such a ramblock).
> 
> I want 2) to reuse qemu_get_buffer_in_place(), so we avoid a copy process
> for the small page which is faster (even if it's hugetlb backed, now we can
> reuse the qemufile buffer safely).

Since at it, one more thing worth mentioning is I didn't actually know
whether the original code is always correct when target and host small
psizes don't match..  This is the original line:

  matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;

The problem is we're comparing block page size against target page size,
however block page size should be in host page size granule:

  RAMBlock *qemu_ram_alloc_internal()
  {
    new_block->page_size = qemu_real_host_page_size();

IOW, I am not sure whether postcopy will run at all in that case.  For
example, when we run an Alpha emulator upon x86_64, we can have target
psize 8K while host psize 4K.

The migration protocol should be TARGET_PAGE_SIZE based.  It means, for
postcopy when receiving a single page for Alpha VM being migrated, maybe we
should call UFFDIO_COPY (or UFFDIO_CONTINUE; doesn't matter here) twice
because one guest page contains two host pages.

I'm not sure whether I get all these right.. if so, we have two options:

  a) Forbid postcopy as a whole when detecting qemu_real_host_page_size()
     != TARGET_PAGE_SIZE.

  b) Implement postcopy for that case

I'd go with a) even if it's an issue because it means no one is migrating
that thing in postcopy way in the past N years, so it justifies that maybe
b) doesn't worth it.
Juan Quintela Jan. 30, 2023, 5:17 a.m. UTC | #4
Peter Xu <peterx@redhat.com> wrote:
> Migration may not want to recognize memory chunks in page size of the host
> only, but sometimes we may want to recognize the memory in smaller chunks
> if e.g. they're doubly mapped as both huge and small.
>
> In those cases we'll prefer to assume the memory page size is always mapped
> small (qemu_real_host_page_size) and we'll do things just like when the
> pages was only smally mapped.
>
> Let's do this to be prepared of postcopy double-mapping for hugetlbfs.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>


Reviewed-by: Juan Quintela <quintela@redhat.com>


> ---
>  migration/migration.c    |  6 ++++--
>  migration/postcopy-ram.c | 16 +++++++++-------
>  migration/ram.c          | 29 ++++++++++++++++++++++-------
>  migration/ram.h          |  1 +
>  4 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index b174f2af92..f6fe474fc3 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -408,7 +408,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
>  {
>      uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
>      size_t msglen = 12; /* start + len */
> -    size_t len = qemu_ram_pagesize(rb);
> +    size_t len = migration_ram_pagesize(rb);
>      enum mig_rp_message_type msg_type;
>      const char *rbname;
>      int rbname_len;
> @@ -443,8 +443,10 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
>  int migrate_send_rp_req_pages(MigrationIncomingState *mis,
>                                RAMBlock *rb, ram_addr_t start, uint64_t haddr)
>  {
> -    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
>      bool received = false;
> +    void *aligned;
> +
> +    aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, migration_ram_pagesize(rb));

I am trying that all new code declares variables at 1st use, and this
goes in the wrong direction.  As this happens more than once in this
patch, can we change the macro (or create another macro) that also does
the cast?
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index b174f2af92..f6fe474fc3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -408,7 +408,7 @@  int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 {
     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
     size_t msglen = 12; /* start + len */
-    size_t len = qemu_ram_pagesize(rb);
+    size_t len = migration_ram_pagesize(rb);
     enum mig_rp_message_type msg_type;
     const char *rbname;
     int rbname_len;
@@ -443,8 +443,10 @@  int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
 {
-    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
     bool received = false;
+    void *aligned;
+
+    aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, migration_ram_pagesize(rb));
 
     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 2c86bfc091..acae1dc6ae 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -694,7 +694,7 @@  int postcopy_wake_shared(struct PostCopyFD *pcfd,
                          uint64_t client_addr,
                          RAMBlock *rb)
 {
-    size_t pagesize = qemu_ram_pagesize(rb);
+    size_t pagesize = migration_ram_pagesize(rb);
     struct uffdio_range range;
     int ret;
     trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
@@ -712,7 +712,9 @@  int postcopy_wake_shared(struct PostCopyFD *pcfd,
 static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
                                  ram_addr_t start, uint64_t haddr)
 {
-    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
+    void *aligned;
+
+    aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, migration_ram_pagesize(rb));
 
     /*
      * Discarded pages (via RamDiscardManager) are never migrated. On unlikely
@@ -722,7 +724,7 @@  static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
      * Checking a single bit is sufficient to handle pagesize > TPS as either
      * all relevant bits are set or not.
      */
-    assert(QEMU_IS_ALIGNED(start, qemu_ram_pagesize(rb)));
+    assert(QEMU_IS_ALIGNED(start, migration_ram_pagesize(rb)));
     if (ramblock_page_is_discarded(rb, start)) {
         bool received = ramblock_recv_bitmap_test_byte_offset(rb, start);
 
@@ -740,7 +742,7 @@  static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
 int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
                                  uint64_t client_addr, uint64_t rb_offset)
 {
-    uint64_t aligned_rbo = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
+    uint64_t aligned_rbo = ROUND_DOWN(rb_offset, migration_ram_pagesize(rb));
     MigrationIncomingState *mis = migration_incoming_get_current();
 
     trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
@@ -1020,7 +1022,7 @@  static void *postcopy_ram_fault_thread(void *opaque)
                 break;
             }
 
-            rb_offset = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
+            rb_offset = ROUND_DOWN(rb_offset, migration_ram_pagesize(rb));
             trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset,
@@ -1281,7 +1283,7 @@  int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset)
 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
                         RAMBlock *rb)
 {
-    size_t pagesize = qemu_ram_pagesize(rb);
+    size_t pagesize = migration_ram_pagesize(rb);
 
     /* copy also acks to the kernel waking the stalled thread up
      * TODO: We can inhibit that ack and only do it if it was requested
@@ -1308,7 +1310,7 @@  int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
                              RAMBlock *rb)
 {
-    size_t pagesize = qemu_ram_pagesize(rb);
+    size_t pagesize = migration_ram_pagesize(rb);
     trace_postcopy_place_page_zero(host);
 
     /* Normal RAMBlocks can zero a page using UFFDIO_ZEROPAGE
diff --git a/migration/ram.c b/migration/ram.c
index 334309f1c6..945c6477fd 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -121,6 +121,20 @@  static struct {
     uint8_t *decoded_buf;
 } XBZRLE;
 
+/* Get the page size we should use for migration purpose. */
+size_t migration_ram_pagesize(RAMBlock *block)
+{
+    /*
+     * When hugetlb doublemap is enabled, we should always use the smallest
+     * page for migration.
+     */
+    if (migrate_hugetlb_doublemap()) {
+        return qemu_real_host_page_size();
+    }
+
+    return qemu_ram_pagesize(block);
+}
+
 static void XBZRLE_cache_lock(void)
 {
     if (migrate_use_xbzrle()) {
@@ -1049,7 +1063,7 @@  bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
         MemoryRegionSection section = {
             .mr = rb->mr,
             .offset_within_region = start,
-            .size = int128_make64(qemu_ram_pagesize(rb)),
+            .size = int128_make64(migration_ram_pagesize(rb)),
         };
 
         return !ram_discard_manager_is_populated(rdm, &section);
@@ -2152,7 +2166,7 @@  int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
      */
     if (postcopy_preempt_active()) {
         ram_addr_t page_start = start >> TARGET_PAGE_BITS;
-        size_t page_size = qemu_ram_pagesize(ramblock);
+        size_t page_size = migration_ram_pagesize(ramblock);
         PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY];
         int ret = 0;
 
@@ -2316,7 +2330,7 @@  static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
 static void pss_host_page_prepare(PageSearchStatus *pss)
 {
     /* How many guest pages are there in one host page? */
-    size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
+    size_t guest_pfns = migration_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
 
     pss->host_page_sending = true;
     pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);
@@ -2425,7 +2439,7 @@  static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
     bool page_dirty, preempt_active = postcopy_preempt_active();
     int tmppages, pages = 0;
     size_t pagesize_bits =
-        qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
+        migration_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
     unsigned long start_page = pss->page;
     int res;
 
@@ -3518,7 +3532,7 @@  static void *host_page_from_ram_block_offset(RAMBlock *block,
 {
     /* Note: Explicitly no check against offset_in_ramblock(). */
     return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
-                                   block->page_size);
+                                   migration_ram_pagesize(block));
 }
 
 static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
@@ -3970,7 +3984,8 @@  int ram_load_postcopy(QEMUFile *f, int channel)
                 break;
             }
             tmp_page->target_pages++;
-            matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
+            matches_target_page_size =
+                migration_ram_pagesize(block) == TARGET_PAGE_SIZE;
             /*
              * Postcopy requires that we place whole host pages atomically;
              * these may be huge pages for RAMBlocks that are backed by
@@ -4005,7 +4020,7 @@  int ram_load_postcopy(QEMUFile *f, int channel)
              * page
              */
             if (tmp_page->target_pages ==
-                (block->page_size / TARGET_PAGE_SIZE)) {
+                (migration_ram_pagesize(block) / TARGET_PAGE_SIZE)) {
                 place_needed = true;
             }
             place_source = tmp_page->tmp_huge_page;
diff --git a/migration/ram.h b/migration/ram.h
index 81cbb0947c..162b3e7cb8 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -68,6 +68,7 @@  bool ramblock_is_ignored(RAMBlock *block);
         if (!qemu_ram_is_migratable(block)) {} else
 
 int xbzrle_cache_resize(uint64_t new_size, Error **errp);
+size_t migration_ram_pagesize(RAMBlock *block);
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_total(void);
 void mig_throttle_counter_reset(void);