diff mbox series

[v20210601,09/38] tools/guest: prepare to allocate arrays once

Message ID 20210601161118.18986-10-olaf@aepfle.de (mailing list archive)
State Superseded
Headers show
Series leftover from 2020 | expand

Commit Message

Olaf Hering June 1, 2021, 4:10 p.m. UTC
The hotpath 'send_dirty_pages' is supposed to do just one thing: sending.
The other end 'handle_page_data' is supposed to do just receiving.

But instead both do other costly work like memory allocations and data moving.
Do the allocations once, the array sizes are a compiletime constant.
Avoid unneeded copying of data by receiving data directly into mapped guest memory.

This patch is just prepartion, subsequent changes will populate the arrays.

Once all changes are applied, migration of a busy HVM domU changes like that:

Without this series, from sr650 to sr950 (xen-4.15.20201027T173911.16a20963b3 xen_testing):
2020-10-29 10:23:10.711+0000: xc: show_transfer_rate: 23663128 bytes + 2879563 pages in 55.324905335 sec, 203 MiB/sec: Internal error
2020-10-29 10:23:35.115+0000: xc: show_transfer_rate: 16829632 bytes + 2097552 pages in 24.401179720 sec, 335 MiB/sec: Internal error
2020-10-29 10:23:59.436+0000: xc: show_transfer_rate: 16829032 bytes + 2097478 pages in 24.319025928 sec, 336 MiB/sec: Internal error
2020-10-29 10:24:23.844+0000: xc: show_transfer_rate: 16829024 bytes + 2097477 pages in 24.406992500 sec, 335 MiB/sec: Internal error
2020-10-29 10:24:48.292+0000: xc: show_transfer_rate: 16828912 bytes + 2097463 pages in 24.446489027 sec, 335 MiB/sec: Internal error
2020-10-29 10:25:01.816+0000: xc: show_transfer_rate: 16836080 bytes + 2098356 pages in 13.447091818 sec, 609 MiB/sec: Internal error

With this series, from sr650 to sr950 (xen-4.15.20201027T173911.16a20963b3 xen_unstable):
2020-10-28 21:26:05.074+0000: xc: show_transfer_rate: 23663128 bytes + 2879563 pages in 52.564054368 sec, 213 MiB/sec: Internal error
2020-10-28 21:26:23.527+0000: xc: show_transfer_rate: 16830040 bytes + 2097603 pages in 18.450592015 sec, 444 MiB/sec: Internal error
2020-10-28 21:26:41.926+0000: xc: show_transfer_rate: 16830944 bytes + 2097717 pages in 18.397862306 sec, 445 MiB/sec: Internal error
2020-10-28 21:27:00.339+0000: xc: show_transfer_rate: 16829176 bytes + 2097498 pages in 18.411973339 sec, 445 MiB/sec: Internal error
2020-10-28 21:27:18.643+0000: xc: show_transfer_rate: 16828592 bytes + 2097425 pages in 18.303326695 sec, 447 MiB/sec: Internal error
2020-10-28 21:27:26.289+0000: xc: show_transfer_rate: 16835952 bytes + 2098342 pages in 7.579846749 sec, 1081 MiB/sec: Internal error

Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
 tools/libs/saverestore/common.h  | 8 ++++++++
 tools/libs/saverestore/restore.c | 8 ++++++++
 tools/libs/saverestore/save.c    | 4 +++-
 3 files changed, 19 insertions(+), 1 deletion(-)

Comments

Jürgen Groß June 2, 2021, 7:29 a.m. UTC | #1
On 01.06.21 18:10, Olaf Hering wrote:
> The hotpath 'send_dirty_pages' is supposed to do just one thing: sending.
> The other end 'handle_page_data' is supposed to do just receiving.
> 
> But instead both do other costly work like memory allocations and data moving.
> Do the allocations once, the array sizes are a compiletime constant.
> Avoid unneeded copying of data by receiving data directly into mapped guest memory.
> 
> This patch is just prepartion, subsequent changes will populate the arrays.
> 
> Once all changes are applied, migration of a busy HVM domU changes like 
that:
> 
> Without this series, from sr650 to sr950 (xen-4.15.20201027T173911.16a20963b3 xen_testing):
> 2020-10-29 10:23:10.711+0000: xc: show_transfer_rate: 23663128 bytes + 2879563 pages in 55.324905335 sec, 203 MiB/sec: Internal error
> 2020-10-29 10:23:35.115+0000: xc: show_transfer_rate: 16829632 bytes + 2097552 pages in 24.401179720 sec, 335 MiB/sec: Internal error
> 2020-10-29 10:23:59.436+0000: xc: show_transfer_rate: 16829032 bytes + 2097478 pages in 24.319025928 sec, 336 MiB/sec: Internal error
> 2020-10-29 10:24:23.844+0000: xc: show_transfer_rate: 16829024 bytes + 2097477 pages in 24.406992500 sec, 335 MiB/sec: Internal error
> 2020-10-29 10:24:48.292+0000: xc: show_transfer_rate: 16828912 bytes + 2097463 pages in 24.446489027 sec, 335 MiB/sec: Internal error
> 2020-10-29 10:25:01.816+0000: xc: show_transfer_rate: 16836080 bytes + 2098356 pages in 13.447091818 sec, 609 MiB/sec: Internal error
> 
> With this series, from sr650 to sr950 (xen-4.15.20201027T173911.16a20963b3 xen_unstable):
> 2020-10-28 21:26:05.074+0000: xc: show_transfer_rate: 23663128 bytes + 2879563 pages in 52.564054368 sec, 213 MiB/sec: Internal error
> 2020-10-28 21:26:23.527+0000: xc: show_transfer_rate: 16830040 bytes + 2097603 pages in 18.450592015 sec, 444 MiB/sec: Internal error
> 2020-10-28 21:26:41.926+0000: xc: show_transfer_rate: 16830944 bytes + 2097717 pages in 18.397862306 sec, 445 MiB/sec: Internal error
> 2020-10-28 21:27:00.339+0000: xc: show_transfer_rate: 16829176 bytes + 2097498 pages in 18.411973339 sec, 445 MiB/sec: Internal error
> 2020-10-28 21:27:18.643+0000: xc: show_transfer_rate: 16828592 bytes + 2097425 pages in 18.303326695 sec, 447 MiB/sec: Internal error
> 2020-10-28 21:27:26.289+0000: xc: show_transfer_rate: 16835952 bytes + 2098342 pages in 7.579846749 sec, 1081 MiB/sec: Internal error
> 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> ---
>   tools/libs/saverestore/common.h  | 8 ++++++++
>   tools/libs/saverestore/restore.c | 8 ++++++++
>   tools/libs/saverestore/save.c    | 4 +++-
>   3 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/libs/saverestore/common.h b/tools/libs/saverestore/common.h
> index f5fe23caad..80b2e878aa 100644
> --- a/tools/libs/saverestore/common.h
> +++ b/tools/libs/saverestore/common.h
> @@ -223,6 +223,12 @@ static inline int update_blob(struct xc_sr_blob *blob,
>       return 0;
>   }
>   
> +struct xc_sr_save_arrays {
> +};
> +
> +struct xc_sr_restore_arrays {
> +};

Can you please add the mfns/pfns arrays to above types, as ...

> +
>   struct xc_sr_context
>   {
>       xc_interface *xch;
> @@ -260,6 +266,7 @@ struct xc_sr_context
>               unsigned long *deferred_pages;
>               unsigned long nr_deferred_pages;
>               xc_hypercall_buffer_t dirty_bitmap_hbuf;
> +            struct xc_sr_save_arrays *m;
>           } save;
>   
>           struct /* Restore data. */
> @@ -311,6 +318,7 @@ struct xc_sr_context
>   
>               /* Sender has invoked verify mode on the stream. */
>               bool verify;
> +            struct xc_sr_restore_arrays *m;
>           } restore;
>       };
>   
> diff --git a/tools/libs/saverestore/restore.c b/tools/libs/saverestore/restore.c
> index 700f9e74b5..a6cf9ee41c 100644
> --- a/tools/libs/saverestore/restore.c
> +++ b/tools/libs/saverestore/restore.c
> @@ -739,6 +739,13 @@ static int setup(struct xc_sr_context *ctx)
>       }
>       ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
>   
> +    ctx->restore.m = malloc(sizeof(*ctx->restore.m));
> +    if ( !ctx->restore.m ) {

... this case might trigger without the full series applied, due to
allocating zero bytes (same for the save side below).


Juergen
Olaf Hering June 2, 2021, 12:03 p.m. UTC | #2
Am Wed, 2 Jun 2021 09:29:08 +0200
schrieb Juergen Gross <jgross@suse.com>:

> > +    ctx->restore.m = malloc(sizeof(*ctx->restore.m));
> > +    if ( !ctx->restore.m ) {  
> 
> ... this case might trigger without the full series applied, due to
> allocating zero bytes (same for the save side below).

Such bisection point with a libc that returns NULL would be just bad luck.

See git-bisect(1) "Avoiding testing a commit" how to deal with it, in the unlikely case it actually triggers.


Olaf
Jürgen Groß June 2, 2021, 12:09 p.m. UTC | #3
On 02.06.21 14:03, Olaf Hering wrote:
> Am Wed, 2 Jun 2021 09:29:08 +0200
> schrieb Juergen Gross <jgross@suse.com>:
> 
>>> +    ctx->restore.m = malloc(sizeof(*ctx->restore.m));
>>> +    if ( !ctx->restore.m ) {
>>
>> ... this case might trigger without the full series applied, due to
>> allocating zero bytes (same for the save side below).
> 
> Such bisection point with a libc that returns NULL would be just bad luck.

Sure, but sending a patch which is known to break bisecting is bad
behavior. You could even add a dummy element (with a comment indicating
its purpose) which could be removed when the first "real" structure
element is being added.

> See git-bisect(1) "Avoiding testing a commit" how to deal with it, in the unlikely case it actually triggers.

It can be avoided, yes, but you need to search for the reason the
failure occurred first. And this debugging effort should be avoided
if possible.


Juergen
diff mbox series

Patch

diff --git a/tools/libs/saverestore/common.h b/tools/libs/saverestore/common.h
index f5fe23caad..80b2e878aa 100644
--- a/tools/libs/saverestore/common.h
+++ b/tools/libs/saverestore/common.h
@@ -223,6 +223,12 @@  static inline int update_blob(struct xc_sr_blob *blob,
     return 0;
 }
 
+struct xc_sr_save_arrays {
+};
+
+struct xc_sr_restore_arrays {
+};
+
 struct xc_sr_context
 {
     xc_interface *xch;
@@ -260,6 +266,7 @@  struct xc_sr_context
             unsigned long *deferred_pages;
             unsigned long nr_deferred_pages;
             xc_hypercall_buffer_t dirty_bitmap_hbuf;
+            struct xc_sr_save_arrays *m;
         } save;
 
         struct /* Restore data. */
@@ -311,6 +318,7 @@  struct xc_sr_context
 
             /* Sender has invoked verify mode on the stream. */
             bool verify;
+            struct xc_sr_restore_arrays *m;
         } restore;
     };
 
diff --git a/tools/libs/saverestore/restore.c b/tools/libs/saverestore/restore.c
index 700f9e74b5..a6cf9ee41c 100644
--- a/tools/libs/saverestore/restore.c
+++ b/tools/libs/saverestore/restore.c
@@ -739,6 +739,13 @@  static int setup(struct xc_sr_context *ctx)
     }
     ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
 
+    ctx->restore.m = malloc(sizeof(*ctx->restore.m));
+    if ( !ctx->restore.m ) {
+        ERROR("Unable to allocate memory for arrays");
+        rc = -1;
+        goto err;
+    }
+
  err:
     return rc;
 }
@@ -757,6 +764,7 @@  static void cleanup(struct xc_sr_context *ctx)
         xc_hypercall_buffer_free_pages(
             xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
 
+    free(ctx->restore.m);
     free(ctx->restore.buffered_records);
     free(ctx->restore.populated_pfns);
 
diff --git a/tools/libs/saverestore/save.c b/tools/libs/saverestore/save.c
index 760ca04a84..1662e3ee50 100644
--- a/tools/libs/saverestore/save.c
+++ b/tools/libs/saverestore/save.c
@@ -853,8 +853,9 @@  static int setup(struct xc_sr_context *ctx)
     ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
                                   sizeof(*ctx->save.batch_pfns));
     ctx->save.deferred_pages = bitmap_alloc(ctx->save.p2m_size);
+    ctx->save.m = malloc(sizeof(*ctx->save.m));
 
-    if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
+    if ( !ctx->save.m || !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
     {
         ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
               " deferred pages");
@@ -886,6 +887,7 @@  static void cleanup(struct xc_sr_context *ctx)
                                    NRPAGES(bitmap_size(ctx->save.p2m_size)));
     free(ctx->save.deferred_pages);
     free(ctx->save.batch_pfns);
+    free(ctx->save.m);
 }
 
 /*