diff mbox series

[RFC,05/15] migration: Simplify unqueue_page()

Message ID 20220119080929.39485-6-peterx@redhat.com (mailing list archive)
State New, archived
Headers show
Series migration: Postcopy Preemption | expand

Commit Message

Peter Xu Jan. 19, 2022, 8:09 a.m. UTC
This patch simplifies unqueue_page() on both sides of it (itself, and caller).

Firstly, due to the fact that right after unqueue_page() returned true, we'll
definitely send a huge page (see ram_save_huge_page() call - it will _never_
exit before finish sending that huge page), so unqueue_page() does not need to
jump in small page size if huge page is enabled on the ramblock.  IOW, it's
destined that only the 1st 4K page will be valid, when unqueue the 2nd+ time
we'll notice the whole huge page has already been sent anyway.  Switching to
operating on huge page reduces a lot of the loops of redundant unqueue_page().

Meanwhile, drop the dirty check.  It's not helpful to call test_bit() every
time to jump over clean pages, as ram_save_host_page() has already done so,
while in a faster way (see commit ba1b7c812c ("migration/ram: Optimize
ram_save_host_page()", 2021-05-13)).  So that's not necessary too.

Drop the two tracepoints along the way - based on above analysis it's very
possible that no one is really using it..

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/ram.c        | 34 ++++++++--------------------------
 migration/trace-events |  2 --
 2 files changed, 8 insertions(+), 28 deletions(-)

Comments

Dr. David Alan Gilbert Jan. 19, 2022, 4:36 p.m. UTC | #1
* Peter Xu (peterx@redhat.com) wrote:
> This patch simplifies unqueue_page() on both sides of it (itself, and caller).
> 
> Firstly, due to the fact that right after unqueue_page() returned true, we'll
> definitely send a huge page (see ram_save_huge_page() call - it will _never_
> exit before finish sending that huge page), so unqueue_page() does not need to
> jump in small page size if huge page is enabled on the ramblock.  IOW, it's
> destined that only the 1st 4K page will be valid, when unqueue the 2nd+ time
> we'll notice the whole huge page has already been sent anyway.  Switching to
> operating on huge page reduces a lot of the loops of redundant unqueue_page().
> 
> Meanwhile, drop the dirty check.  It's not helpful to call test_bit() every
> time to jump over clean pages, as ram_save_host_page() has already done so,
> while in a faster way (see commit ba1b7c812c ("migration/ram: Optimize
> ram_save_host_page()", 2021-05-13)).  So that's not necessary too.
> 
> Drop the two tracepoints along the way - based on above analysis it's very
> possible that no one is really using it..
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

Yes, OK

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Although:
  a) You might like to keep a trace in get_queued_page just to see
what's getting unqueued
  b) I think originally it was a useful diagnostic to find out when we
were getting a lot of queue requests for pages that were already sent.

Dave


> ---
>  migration/ram.c        | 34 ++++++++--------------------------
>  migration/trace-events |  2 --
>  2 files changed, 8 insertions(+), 28 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index dc6ba041fa..0df15ff663 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1541,6 +1541,7 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
>  {
>      struct RAMSrcPageRequest *entry;
>      RAMBlock *block = NULL;
> +    size_t page_size;
>  
>      if (!postcopy_has_request(rs)) {
>          return NULL;
> @@ -1557,10 +1558,13 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
>      entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
>      block = entry->rb;
>      *offset = entry->offset;
> +    page_size = qemu_ram_pagesize(block);
> +    /* Each page request should only be multiple page size of the ramblock */
> +    assert((entry->len % page_size) == 0);
>  
> -    if (entry->len > TARGET_PAGE_SIZE) {
> -        entry->len -= TARGET_PAGE_SIZE;
> -        entry->offset += TARGET_PAGE_SIZE;
> +    if (entry->len > page_size) {
> +        entry->len -= page_size;
> +        entry->offset += page_size;
>      } else {
>          memory_region_unref(block->mr);
>          QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
> @@ -1942,30 +1946,8 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
>  {
>      RAMBlock  *block;
>      ram_addr_t offset;
> -    bool dirty;
>  
> -    do {
> -        block = unqueue_page(rs, &offset);
> -        /*
> -         * We're sending this page, and since it's postcopy nothing else
> -         * will dirty it, and we must make sure it doesn't get sent again
> -         * even if this queue request was received after the background
> -         * search already sent it.
> -         */
> -        if (block) {
> -            unsigned long page;
> -
> -            page = offset >> TARGET_PAGE_BITS;
> -            dirty = test_bit(page, block->bmap);
> -            if (!dirty) {
> -                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
> -                                                page);
> -            } else {
> -                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
> -            }
> -        }
> -
> -    } while (block && !dirty);
> +    block = unqueue_page(rs, &offset);
>  
>      if (!block) {
>          /*
> diff --git a/migration/trace-events b/migration/trace-events
> index e165687af2..3a9b3567ae 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -85,8 +85,6 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
>  qemu_file_fclose(void) ""
>  
>  # ram.c
> -get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
> -get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
>  migration_bitmap_sync_start(void) ""
>  migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
>  migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
> -- 
> 2.32.0
>
Peter Xu Jan. 20, 2022, 2:23 a.m. UTC | #2
On Wed, Jan 19, 2022 at 04:36:50PM +0000, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > This patch simplifies unqueue_page() on both sides of it (itself, and caller).
> > 
> > Firstly, due to the fact that right after unqueue_page() returned true, we'll
> > definitely send a huge page (see ram_save_huge_page() call - it will _never_
> > exit before finish sending that huge page), so unqueue_page() does not need to
> > jump in small page size if huge page is enabled on the ramblock.  IOW, it's
> > destined that only the 1st 4K page will be valid, when unqueue the 2nd+ time
> > we'll notice the whole huge page has already been sent anyway.  Switching to
> > operating on huge page reduces a lot of the loops of redundant unqueue_page().
> > 
> > Meanwhile, drop the dirty check.  It's not helpful to call test_bit() every
> > time to jump over clean pages, as ram_save_host_page() has already done so,
> > while in a faster way (see commit ba1b7c812c ("migration/ram: Optimize
> > ram_save_host_page()", 2021-05-13)).  So that's not necessary too.
> > 
> > Drop the two tracepoints along the way - based on above analysis it's very
> > possible that no one is really using it..
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> Yes, OK
> 
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> 
> Although:
>   a) You might like to keep a trace in get_queued_page just to see
> what's getting unqueued
>   b) I think originally it was a useful diagnostic to find out when we
> were getting a lot of queue requests for pages that were already sent.

Ah, that makes sense.  How about I keep the test_bit but remove the loop?  I
can make both a) and b) into one tracepoint:

========
diff --git a/migration/ram.c b/migration/ram.c
index 0df15ff663..02f36fa6d5 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1572,6 +1572,9 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
         migration_consume_urgent_request();
     }
 
+    trace_unqueue_page(block->idstr, *offset,
+                       test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
+
     return block;
 }
 
diff --git a/migration/trace-events b/migration/trace-events
index 3a9b3567ae..efa3a95f81 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -110,6 +110,7 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
 ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
 ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
 ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
+unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
 
 # multifd.c
 multifd_new_send_channel_async(uint8_t id) "channel %d"
========

Thanks,
Dr. David Alan Gilbert Jan. 25, 2022, 11:01 a.m. UTC | #3
* Peter Xu (peterx@redhat.com) wrote:
> On Wed, Jan 19, 2022 at 04:36:50PM +0000, Dr. David Alan Gilbert wrote:
> > * Peter Xu (peterx@redhat.com) wrote:
> > > This patch simplifies unqueue_page() on both sides of it (itself, and caller).
> > > 
> > > Firstly, due to the fact that right after unqueue_page() returned true, we'll
> > > definitely send a huge page (see ram_save_huge_page() call - it will _never_
> > > exit before finish sending that huge page), so unqueue_page() does not need to
> > > jump in small page size if huge page is enabled on the ramblock.  IOW, it's
> > > destined that only the 1st 4K page will be valid, when unqueue the 2nd+ time
> > > we'll notice the whole huge page has already been sent anyway.  Switching to
> > > operating on huge page reduces a lot of the loops of redundant unqueue_page().
> > > 
> > > Meanwhile, drop the dirty check.  It's not helpful to call test_bit() every
> > > time to jump over clean pages, as ram_save_host_page() has already done so,
> > > while in a faster way (see commit ba1b7c812c ("migration/ram: Optimize
> > > ram_save_host_page()", 2021-05-13)).  So that's not necessary too.
> > > 
> > > Drop the two tracepoints along the way - based on above analysis it's very
> > > possible that no one is really using it..
> > > 
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > 
> > Yes, OK
> > 
> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > 
> > Although:
> >   a) You might like to keep a trace in get_queued_page just to see
> > what's getting unqueued
> >   b) I think originally it was a useful diagnostic to find out when we
> > were getting a lot of queue requests for pages that were already sent.
> 
> Ah, that makes sense.  How about I keep the test_bit but remove the loop?  I
> can make both a) and b) into one tracepoint:

Yes, i think that's fine.


> ========
> diff --git a/migration/ram.c b/migration/ram.c
> index 0df15ff663..02f36fa6d5 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1572,6 +1572,9 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
>          migration_consume_urgent_request();
>      }
>  
> +    trace_unqueue_page(block->idstr, *offset,
> +                       test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
> +
>      return block;
>  }
>  
> diff --git a/migration/trace-events b/migration/trace-events
> index 3a9b3567ae..efa3a95f81 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -110,6 +110,7 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
>  ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
>  ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
>  ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
> +unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
>  
>  # multifd.c
>  multifd_new_send_channel_async(uint8_t id) "channel %d"
> ========
> 
> Thanks,
> 
> -- 
> Peter Xu
>
Juan Quintela Jan. 27, 2022, 9:41 a.m. UTC | #4
Peter Xu <peterx@redhat.com> wrote:
> This patch simplifies unqueue_page() on both sides of it (itself, and caller).
>
> Firstly, due to the fact that right after unqueue_page() returned true, we'll
> definitely send a huge page (see ram_save_huge_page() call - it will _never_
> exit before finish sending that huge page), so unqueue_page() does not need to
> jump in small page size if huge page is enabled on the ramblock.  IOW, it's
> destined that only the 1st 4K page will be valid, when unqueue the 2nd+ time
> we'll notice the whole huge page has already been sent anyway.  Switching to
> operating on huge page reduces a lot of the loops of redundant unqueue_page().
>
> Meanwhile, drop the dirty check.  It's not helpful to call test_bit() every
> time to jump over clean pages, as ram_save_host_page() has already done so,
> while in a faster way (see commit ba1b7c812c ("migration/ram: Optimize
> ram_save_host_page()", 2021-05-13)).  So that's not necessary too.
>
> Drop the two tracepoints along the way - based on above analysis it's very
> possible that no one is really using it..
>
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

queued.

I added the extra tracepoint that you added later.
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index dc6ba041fa..0df15ff663 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1541,6 +1541,7 @@  static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
 {
     struct RAMSrcPageRequest *entry;
     RAMBlock *block = NULL;
+    size_t page_size;
 
     if (!postcopy_has_request(rs)) {
         return NULL;
@@ -1557,10 +1558,13 @@  static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
     entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
     block = entry->rb;
     *offset = entry->offset;
+    page_size = qemu_ram_pagesize(block);
+    /* Each page request should only be multiple page size of the ramblock */
+    assert((entry->len % page_size) == 0);
 
-    if (entry->len > TARGET_PAGE_SIZE) {
-        entry->len -= TARGET_PAGE_SIZE;
-        entry->offset += TARGET_PAGE_SIZE;
+    if (entry->len > page_size) {
+        entry->len -= page_size;
+        entry->offset += page_size;
     } else {
         memory_region_unref(block->mr);
         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1942,30 +1946,8 @@  static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock  *block;
     ram_addr_t offset;
-    bool dirty;
 
-    do {
-        block = unqueue_page(rs, &offset);
-        /*
-         * We're sending this page, and since it's postcopy nothing else
-         * will dirty it, and we must make sure it doesn't get sent again
-         * even if this queue request was received after the background
-         * search already sent it.
-         */
-        if (block) {
-            unsigned long page;
-
-            page = offset >> TARGET_PAGE_BITS;
-            dirty = test_bit(page, block->bmap);
-            if (!dirty) {
-                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
-                                                page);
-            } else {
-                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
-            }
-        }
-
-    } while (block && !dirty);
+    block = unqueue_page(rs, &offset);
 
     if (!block) {
         /*
diff --git a/migration/trace-events b/migration/trace-events
index e165687af2..3a9b3567ae 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -85,8 +85,6 @@  put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
 qemu_file_fclose(void) ""
 
 # ram.c
-get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
-get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"