diff mbox series

[RESEND,3/4] migration/rdma: destination: create the return patch after the first accept

Message ID 20210520081148.17001-3-lizhijian@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path | expand

Commit Message

Li Zhijian May 20, 2021, 8:11 a.m. UTC
destination side:
$ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888
(qemu) migrate_set_capability postcopy-ram on
(qemu)
dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
Segmentation fault (core dumped)

 (gdb) bt
 #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
 #1  rdma_accept_incoming_migration (opaque=0x0) at     ../migration/rdma.c:3986
 #2  0x0000563c9e51f02a in aio_dispatch_handler
     (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329
 #3  0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at      ../util/aio-posix.c:372
 #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
 #5  0x0000563c9e4f4d9e in aio_ctx_dispatch (source=<optimized out>,      callback=<optimized out>, user_data=<optimized out>)    at ../util/async.c:306
 #6  0x00007fe96ef3fa9f in g_main_context_dispatch () at      /lib64/libglib-2.0.so.0
 #7  0x0000563c9e4ffeb8 in glib_pollfds_poll () at     ../util/main-loop.c:231
 #8  os_host_main_loop_wait (timeout=12188789) at     ../util/main-loop.c:254
 #9  main_loop_wait (nonblocking=nonblocking@entry=0) at     ../util/main-loop.c:530
 #10 0x0000563c9e3c7211 in qemu_main_loop () at     ../softmmu/runstate.c:725
 #11 0x0000563c9dfd46fe in main (argc=<optimized out>, argv=<optimized     out>, envp=<optimized out>) at ../softmmu/main.c:50

The rdma return path will not be created when qemu incoming is starting
since migrate_copy() is false at that moment, then a  NULL return path
rdma was referenced if the user enabled postcopy later.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 migration/rdma.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

Comments

Zhijian Li (Fujitsu) May 20, 2021, 8:30 a.m. UTC | #1
should make some changes for this patch like below:

# git diff
diff --git a/migration/rdma.c b/migration/rdma.c
index 3b228c46ebf..067ea272276 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -316,7 +316,7 @@ typedef struct RDMALocalBlocks {
  typedef struct RDMAContext {
      char *host;
      int port;
-    const char *host_port;
+    char *host_port;

      RDMAWorkRequestData wr_data[RDMA_WRID_MAX];

@@ -2393,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
          rdma->channel = NULL;
      }
      g_free(rdma->host);
+    g_free(rdma->host_port);
      rdma->host = NULL;
+    rdma->host_port = NULL;
  }


@@ -2649,7 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
          if (!inet_parse(addr, host_port, NULL)) {
              rdma->port = atoi(addr->port);
              rdma->host = g_strdup(addr->host);
-            rdma->host_port = host_port;
+            rdma->host_port = g_strdup(host_port);
          } else {
              ERROR(errp, "bad RDMA migration address '%s'", host_port);
              g_free(rdma);
@@ -4076,6 +4078,7 @@ err:
      error_propagate(errp, local_err);
      if (rdma) {
          g_free(rdma->host);
+        g_free(rdma->host_port);
      }
      g_free(rdma);
      g_free(rdma_return_path);


On 20/05/2021 16.11, Li Zhijian wrote:
> destination side:
> $ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888
> (qemu) migrate_set_capability postcopy-ram on
> (qemu)
> dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
> Segmentation fault (core dumped)
>
>   (gdb) bt
>   #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
>   #1  rdma_accept_incoming_migration (opaque=0x0) at     ../migration/rdma.c:3986
>   #2  0x0000563c9e51f02a in aio_dispatch_handler
>       (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329
>   #3  0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at      ../util/aio-posix.c:372
>   #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
>   #5  0x0000563c9e4f4d9e in aio_ctx_dispatch (source=<optimized out>,      callback=<optimized out>, user_data=<optimized out>)    at ../util/async.c:306
>   #6  0x00007fe96ef3fa9f in g_main_context_dispatch () at      /lib64/libglib-2.0.so.0
>   #7  0x0000563c9e4ffeb8 in glib_pollfds_poll () at     ../util/main-loop.c:231
>   #8  os_host_main_loop_wait (timeout=12188789) at     ../util/main-loop.c:254
>   #9  main_loop_wait (nonblocking=nonblocking@entry=0) at     ../util/main-loop.c:530
>   #10 0x0000563c9e3c7211 in qemu_main_loop () at     ../softmmu/runstate.c:725
>   #11 0x0000563c9dfd46fe in main (argc=<optimized out>, argv=<optimized     out>, envp=<optimized out>) at ../softmmu/main.c:50
>
> The rdma return path will not be created when qemu incoming is starting
> since migrate_copy() is false at that moment, then a  NULL return path
> rdma was referenced if the user enabled postcopy later.
>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
>   migration/rdma.c | 29 ++++++++++++++++++-----------
>   1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 651534e825..3b228c46eb 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
>   typedef struct RDMAContext {
>       char *host;
>       int port;
> +    const char *host_port;
>   
>       RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
>   
> @@ -2648,6 +2649,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
>           if (!inet_parse(addr, host_port, NULL)) {
>               rdma->port = atoi(addr->port);
>               rdma->host = g_strdup(addr->host);
> +            rdma->host_port = host_port;
>           } else {
>               ERROR(errp, "bad RDMA migration address '%s'", host_port);
>               g_free(rdma);
> @@ -3276,6 +3278,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>                                               .private_data = &cap,
>                                               .private_data_len = sizeof(cap),
>                                            };
> +    RDMAContext *rdma_return_path = NULL;
>       struct rdma_cm_event *cm_event;
>       struct ibv_context *verbs;
>       int ret = -EINVAL;
> @@ -3291,6 +3294,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>           goto err_rdma_dest_wait;
>       }
>   
> +    /*
> +     * initialize the RDMAContext for return path for postcopy after first
> +     * connection is accepted.
> +     */
> +    if (migrate_postcopy() && !rdma->is_return_path) {
> +        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
> +        if (rdma_return_path == NULL) {
> +            rdma_ack_cm_event(cm_event);
> +            goto err_rdma_dest_wait;
> +        }
> +
> +        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> +    }
> +
>       memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
>   
>       network_to_caps(&cap);
> @@ -3406,6 +3423,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>   err_rdma_dest_wait:
>       rdma->error_state = ret;
>       qemu_rdma_cleanup(rdma);
> +    g_free(rdma_return_path);
>       return ret;
>   }
>   
> @@ -4048,17 +4066,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
>   
>       trace_rdma_start_incoming_migration_after_rdma_listen();
>   
> -    /* initialize the RDMAContext for return path */
> -    if (migrate_postcopy()) {
> -        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
> -
> -        if (rdma_return_path == NULL) {
> -            goto cleanup_rdma;
> -        }
> -
> -        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> -    }
> -
>       qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
>                           NULL, (void *)(intptr_t)rdma);
>       return;
Dr. David Alan Gilbert May 24, 2021, 7:08 p.m. UTC | #2
* lizhijian@fujitsu.com (lizhijian@fujitsu.com) wrote:
> should make some changes for this patch like below:

Can you resend a version with this flattned into it please.

Dave

> # git diff
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 3b228c46ebf..067ea272276 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -316,7 +316,7 @@ typedef struct RDMALocalBlocks {
>   typedef struct RDMAContext {
>       char *host;
>       int port;
> -    const char *host_port;
> +    char *host_port;
> 
>       RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
> 
> @@ -2393,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>           rdma->channel = NULL;
>       }
>       g_free(rdma->host);
> +    g_free(rdma->host_port);
>       rdma->host = NULL;
> +    rdma->host_port = NULL;
>   }
> 
> 
> @@ -2649,7 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
>           if (!inet_parse(addr, host_port, NULL)) {
>               rdma->port = atoi(addr->port);
>               rdma->host = g_strdup(addr->host);
> -            rdma->host_port = host_port;
> +            rdma->host_port = g_strdup(host_port);
>           } else {
>               ERROR(errp, "bad RDMA migration address '%s'", host_port);
>               g_free(rdma);
> @@ -4076,6 +4078,7 @@ err:
>       error_propagate(errp, local_err);
>       if (rdma) {
>           g_free(rdma->host);
> +        g_free(rdma->host_port);
>       }
>       g_free(rdma);
>       g_free(rdma_return_path);
> 
> 
> On 20/05/2021 16.11, Li Zhijian wrote:
> > destination side:
> > $ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888
> > (qemu) migrate_set_capability postcopy-ram on
> > (qemu)
> > dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
> > Segmentation fault (core dumped)
> >
> >   (gdb) bt
> >   #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
> >   #1  rdma_accept_incoming_migration (opaque=0x0) at     ../migration/rdma.c:3986
> >   #2  0x0000563c9e51f02a in aio_dispatch_handler
> >       (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329
> >   #3  0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at      ../util/aio-posix.c:372
> >   #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
> >   #5  0x0000563c9e4f4d9e in aio_ctx_dispatch (source=<optimized out>,      callback=<optimized out>, user_data=<optimized out>)    at ../util/async.c:306
> >   #6  0x00007fe96ef3fa9f in g_main_context_dispatch () at      /lib64/libglib-2.0.so.0
> >   #7  0x0000563c9e4ffeb8 in glib_pollfds_poll () at     ../util/main-loop.c:231
> >   #8  os_host_main_loop_wait (timeout=12188789) at     ../util/main-loop.c:254
> >   #9  main_loop_wait (nonblocking=nonblocking@entry=0) at     ../util/main-loop.c:530
> >   #10 0x0000563c9e3c7211 in qemu_main_loop () at     ../softmmu/runstate.c:725
> >   #11 0x0000563c9dfd46fe in main (argc=<optimized out>, argv=<optimized     out>, envp=<optimized out>) at ../softmmu/main.c:50
> >
> > The rdma return path will not be created when qemu incoming is starting
> > since migrate_copy() is false at that moment, then a  NULL return path
> > rdma was referenced if the user enabled postcopy later.
> >
> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> > ---
> >   migration/rdma.c | 29 ++++++++++++++++++-----------
> >   1 file changed, 18 insertions(+), 11 deletions(-)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 651534e825..3b228c46eb 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
> >   typedef struct RDMAContext {
> >       char *host;
> >       int port;
> > +    const char *host_port;
> >   
> >       RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
> >   
> > @@ -2648,6 +2649,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
> >           if (!inet_parse(addr, host_port, NULL)) {
> >               rdma->port = atoi(addr->port);
> >               rdma->host = g_strdup(addr->host);
> > +            rdma->host_port = host_port;
> >           } else {
> >               ERROR(errp, "bad RDMA migration address '%s'", host_port);
> >               g_free(rdma);
> > @@ -3276,6 +3278,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
> >                                               .private_data = &cap,
> >                                               .private_data_len = sizeof(cap),
> >                                            };
> > +    RDMAContext *rdma_return_path = NULL;
> >       struct rdma_cm_event *cm_event;
> >       struct ibv_context *verbs;
> >       int ret = -EINVAL;
> > @@ -3291,6 +3294,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
> >           goto err_rdma_dest_wait;
> >       }
> >   
> > +    /*
> > +     * initialize the RDMAContext for return path for postcopy after first
> > +     * connection is accepted.
> > +     */
> > +    if (migrate_postcopy() && !rdma->is_return_path) {
> > +        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
> > +        if (rdma_return_path == NULL) {
> > +            rdma_ack_cm_event(cm_event);
> > +            goto err_rdma_dest_wait;
> > +        }
> > +
> > +        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> > +    }
> > +
> >       memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
> >   
> >       network_to_caps(&cap);
> > @@ -3406,6 +3423,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
> >   err_rdma_dest_wait:
> >       rdma->error_state = ret;
> >       qemu_rdma_cleanup(rdma);
> > +    g_free(rdma_return_path);
> >       return ret;
> >   }
> >   
> > @@ -4048,17 +4066,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
> >   
> >       trace_rdma_start_incoming_migration_after_rdma_listen();
> >   
> > -    /* initialize the RDMAContext for return path */
> > -    if (migrate_postcopy()) {
> > -        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
> > -
> > -        if (rdma_return_path == NULL) {
> > -            goto cleanup_rdma;
> > -        }
> > -
> > -        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> > -    }
> > -
> >       qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
> >                           NULL, (void *)(intptr_t)rdma);
> >       return;
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 651534e825..3b228c46eb 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -316,6 +316,7 @@  typedef struct RDMALocalBlocks {
 typedef struct RDMAContext {
     char *host;
     int port;
+    const char *host_port;
 
     RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
 
@@ -2648,6 +2649,7 @@  static void *qemu_rdma_data_init(const char *host_port, Error **errp)
         if (!inet_parse(addr, host_port, NULL)) {
             rdma->port = atoi(addr->port);
             rdma->host = g_strdup(addr->host);
+            rdma->host_port = host_port;
         } else {
             ERROR(errp, "bad RDMA migration address '%s'", host_port);
             g_free(rdma);
@@ -3276,6 +3278,7 @@  static int qemu_rdma_accept(RDMAContext *rdma)
                                             .private_data = &cap,
                                             .private_data_len = sizeof(cap),
                                          };
+    RDMAContext *rdma_return_path = NULL;
     struct rdma_cm_event *cm_event;
     struct ibv_context *verbs;
     int ret = -EINVAL;
@@ -3291,6 +3294,20 @@  static int qemu_rdma_accept(RDMAContext *rdma)
         goto err_rdma_dest_wait;
     }
 
+    /*
+     * initialize the RDMAContext for return path for postcopy after first
+     * connection is accepted.
+     */
+    if (migrate_postcopy() && !rdma->is_return_path) {
+        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
+        if (rdma_return_path == NULL) {
+            rdma_ack_cm_event(cm_event);
+            goto err_rdma_dest_wait;
+        }
+
+        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
+    }
+
     memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
 
     network_to_caps(&cap);
@@ -3406,6 +3423,7 @@  static int qemu_rdma_accept(RDMAContext *rdma)
 err_rdma_dest_wait:
     rdma->error_state = ret;
     qemu_rdma_cleanup(rdma);
+    g_free(rdma_return_path);
     return ret;
 }
 
@@ -4048,17 +4066,6 @@  void rdma_start_incoming_migration(const char *host_port, Error **errp)
 
     trace_rdma_start_incoming_migration_after_rdma_listen();
 
-    /* initialize the RDMAContext for return path */
-    if (migrate_postcopy()) {
-        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
-
-        if (rdma_return_path == NULL) {
-            goto cleanup_rdma;
-        }
-
-        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
-    }
-
     qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
                         NULL, (void *)(intptr_t)rdma);
     return;