diff mbox series

[v4,08/10] Reduce the PVM stop time during Checkpoint

Message ID 1616639091-28279-9-git-send-email-lei.rao@intel.com (mailing list archive)
State New, archived
Headers show
Series Fixed some bugs and optimized some codes for COLO | expand

Commit Message

Rao, Lei March 25, 2021, 2:24 a.m. UTC
From: "Rao, Lei" <lei.rao@intel.com>

When flushing memory from ram cache to ram during every checkpoint
on secondary VM, we can copy continuous chunks of memory instead of
4096 bytes per time to reduce the time of VM stop during checkpoint.

Signed-off-by: Lei Rao <lei.rao@intel.com>
---
 migration/ram.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

Comments

Dr. David Alan Gilbert March 29, 2021, 12:03 p.m. UTC | #1
* leirao (lei.rao@intel.com) wrote:
> From: "Rao, Lei" <lei.rao@intel.com>
> 
> When flushing memory from ram cache to ram during every checkpoint
> on secondary VM, we can copy continuous chunks of memory instead of
> 4096 bytes per time to reduce the time of VM stop during checkpoint.
> 
> Signed-off-by: Lei Rao <lei.rao@intel.com>

A minor comment below, but :

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
>  migration/ram.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index c69a8e0..a258466 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -822,6 +822,39 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
>      return next;
>  }
>  
> +/*
> + * colo_bitmap_find_diry:find contiguous dirty pages from start
> + *
> + * Returns the page offset within memory region of the start of the contiguout
> + * dirty page
> + *
> + * @rs: current RAM state
> + * @rb: RAMBlock where to search for dirty pages
> + * @start: page where we start the search
> + * @num: the number of contiguous dirty pages
> + */
> +static inline
> +unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
> +                                     unsigned long start, unsigned long *num)
> +{
> +    unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
> +    unsigned long *bitmap = rb->bmap;
> +    unsigned long first, next;

It might be better to add the 
       *num = 0

here, which means this function always writes num

> +    if (ramblock_is_ignored(rb)) {
> +        return size;
> +    }
> +
> +    first = find_next_bit(bitmap, size, start);
> +    if (first >= size) {
> +        return first;
> +    }
> +    next = find_next_zero_bit(bitmap, size, first + 1);
> +    assert(next >= first);
> +    *num = next - first;
> +    return first;
> +}
> +
>  static inline bool migration_bitmap_clear_dirty(RAMState *rs,
>                                                  RAMBlock *rb,
>                                                  unsigned long page)
> @@ -3666,6 +3699,8 @@ void colo_flush_ram_cache(void)
>      void *dst_host;
>      void *src_host;
>      unsigned long offset = 0;
> +    unsigned long num = 0;

that could move inside the while loop.

> +    unsigned long i = 0;

This line could move inside the 'else' clause below that uses it.

>      memory_global_dirty_log_sync();
>      WITH_RCU_READ_LOCK_GUARD() {
> @@ -3679,19 +3714,23 @@ void colo_flush_ram_cache(void)
>          block = QLIST_FIRST_RCU(&ram_list.blocks);
>  
>          while (block) {
> -            offset = migration_bitmap_find_dirty(ram_state, block, offset);
> +            offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
>  
>              if (((ram_addr_t)offset) << TARGET_PAGE_BITS
>                  >= block->used_length) {
>                  offset = 0;
> +                num = 0;
>                  block = QLIST_NEXT_RCU(block, next);
>              } else {
> -                migration_bitmap_clear_dirty(ram_state, block, offset);
> +                for (i = 0; i < num; i++) {
> +                    migration_bitmap_clear_dirty(ram_state, block, offset + i);
> +                }
>                  dst_host = block->host
>                           + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
>                  src_host = block->colo_cache
>                           + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
> -                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
> +                memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
> +                offset += num;

I was initially confused as to why the old code didn't have an offset++
but I guess that means it just checked the bit a second time that was
just cleared.

Dave


>              }
>          }
>      }
> -- 
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index c69a8e0..a258466 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -822,6 +822,39 @@  unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
     return next;
 }
 
+/*
+ * colo_bitmap_find_diry:find contiguous dirty pages from start
+ *
+ * Returns the page offset within memory region of the start of the contiguout
+ * dirty page
+ *
+ * @rs: current RAM state
+ * @rb: RAMBlock where to search for dirty pages
+ * @start: page where we start the search
+ * @num: the number of contiguous dirty pages
+ */
+static inline
+unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+                                     unsigned long start, unsigned long *num)
+{
+    unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
+    unsigned long *bitmap = rb->bmap;
+    unsigned long first, next;
+
+    if (ramblock_is_ignored(rb)) {
+        return size;
+    }
+
+    first = find_next_bit(bitmap, size, start);
+    if (first >= size) {
+        return first;
+    }
+    next = find_next_zero_bit(bitmap, size, first + 1);
+    assert(next >= first);
+    *num = next - first;
+    return first;
+}
+
 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
                                                 RAMBlock *rb,
                                                 unsigned long page)
@@ -3666,6 +3699,8 @@  void colo_flush_ram_cache(void)
     void *dst_host;
     void *src_host;
     unsigned long offset = 0;
+    unsigned long num = 0;
+    unsigned long i = 0;
 
     memory_global_dirty_log_sync();
     WITH_RCU_READ_LOCK_GUARD() {
@@ -3679,19 +3714,23 @@  void colo_flush_ram_cache(void)
         block = QLIST_FIRST_RCU(&ram_list.blocks);
 
         while (block) {
-            offset = migration_bitmap_find_dirty(ram_state, block, offset);
+            offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
 
             if (((ram_addr_t)offset) << TARGET_PAGE_BITS
                 >= block->used_length) {
                 offset = 0;
+                num = 0;
                 block = QLIST_NEXT_RCU(block, next);
             } else {
-                migration_bitmap_clear_dirty(ram_state, block, offset);
+                for (i = 0; i < num; i++) {
+                    migration_bitmap_clear_dirty(ram_state, block, offset + i);
+                }
                 dst_host = block->host
                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
                 src_host = block->colo_cache
                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
-                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+                memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
+                offset += num;
             }
         }
     }