diff mbox series

[v2,4/4] migration/rdma: source: poll cm_event from return path

Message ID 20210525080552.28259-4-lizhijian@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path | expand

Commit Message

Li, Zhijian May 25, 2021, 8:05 a.m. UTC
source side always blocks if postcopy is only enabled at source side.
users are not able to cancel this migration in this case.

Let source side have chance to cancel this migration

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
V2: utilize poll to check cm event
---
 migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

Comments

Dr. David Alan Gilbert May 25, 2021, 10:26 a.m. UTC | #1
* Li Zhijian (lizhijian@cn.fujitsu.com) wrote:
> source side always blocks if postcopy is only enabled at source side.
> users are not able to cancel this migration in this case.
> 
> Let source side have chance to cancel this migration
> 
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
> V2: utilize poll to check cm event
> ---
>  migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index d829d08d076..f67e21b4f54 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -36,6 +36,7 @@
>  #include <rdma/rdma_cma.h>
>  #include "trace.h"
>  #include "qom/object.h"
> +#include <poll.h>
>  
>  /*
>   * Print and error on both the Monitor and the Log file.
> @@ -2460,7 +2461,36 @@ err_rdma_source_init:
>      return -1;
>  }
>  
> -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
> +static int qemu_get_cm_event_timeout(RDMAContext *rdma,
> +                                     struct rdma_cm_event **cm_event,
> +                                     long msec, Error **errp)
> +{
> +    int ret;
> +    struct pollfd poll_fd = {
> +                                .fd = rdma->channel->fd,
> +                                .events = POLLIN,
> +                                .revents = 0
> +                            };
> +
> +    do {
> +        ret = poll(&poll_fd, 1, msec);
> +    } while (ret < 0 && errno == EINTR);
> +
> +    if (ret == 0) {
> +        ERROR(errp, "poll cm event timeout");
> +        return -1;
> +    } else if (ret < 0) {
> +        ERROR(errp, "failed to pull cm event, errno=%i", errno);

Typo: 'poll' - I can fix that.

> +        return -1;
> +    } else if (poll_fd.revents & POLLIN) {
> +        return rdma_get_cm_event(rdma->channel, cm_event);
> +    } else {
> +        ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
> +        return -1;
> +    }
> +}
> +
> +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
>  {
>      RDMACapabilities cap = {
>                                  .version = RDMA_CONTROL_VERSION_CURRENT,
> @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
>          goto err_rdma_source_connect;
>      }
>  
> -    ret = rdma_get_cm_event(rdma->channel, &cm_event);
> +    if (return_path) {
> +        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp);

Fixed timeouts are not a great fix; but I can't think of anything
better; the only alternative would be to register the fd on the main
thread's poll and get it to be called back when the event happened.

But for now;

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> +    } else {
> +        ret = rdma_get_cm_event(rdma->channel, &cm_event);
> +    }
>      if (ret) {
>          perror("rdma_get_cm_event after rdma_connect");
>          ERROR(errp, "connecting to destination!");
> @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque,
>      }
>  
>      trace_rdma_start_outgoing_migration_after_rdma_source_init();
> -    ret = qemu_rdma_connect(rdma, errp);
> +    ret = qemu_rdma_connect(rdma, errp, false);
>  
>      if (ret) {
>          goto err;
> @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque,
>              goto return_path_err;
>          }
>  
> -        ret = qemu_rdma_connect(rdma_return_path, errp);
> +        ret = qemu_rdma_connect(rdma_return_path, errp, true);
>  
>          if (ret) {
>              goto return_path_err;
> -- 
> 2.30.2
> 
> 
>
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index d829d08d076..f67e21b4f54 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -36,6 +36,7 @@ 
 #include <rdma/rdma_cma.h>
 #include "trace.h"
 #include "qom/object.h"
+#include <poll.h>
 
 /*
  * Print and error on both the Monitor and the Log file.
@@ -2460,7 +2461,36 @@  err_rdma_source_init:
     return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+                                     struct rdma_cm_event **cm_event,
+                                     long msec, Error **errp)
+{
+    int ret;
+    struct pollfd poll_fd = {
+                                .fd = rdma->channel->fd,
+                                .events = POLLIN,
+                                .revents = 0
+                            };
+
+    do {
+        ret = poll(&poll_fd, 1, msec);
+    } while (ret < 0 && errno == EINTR);
+
+    if (ret == 0) {
+        ERROR(errp, "poll cm event timeout");
+        return -1;
+    } else if (ret < 0) {
+        ERROR(errp, "failed to pull cm event, errno=%i", errno);
+        return -1;
+    } else if (poll_fd.revents & POLLIN) {
+        return rdma_get_cm_event(rdma->channel, cm_event);
+    } else {
+        ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
+        return -1;
+    }
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
     RDMACapabilities cap = {
                                 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2498,7 +2528,11 @@  static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         goto err_rdma_source_connect;
     }
 
-    ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    if (return_path) {
+        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp);
+    } else {
+        ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    }
     if (ret) {
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
@@ -4111,7 +4145,7 @@  void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, errp);
+    ret = qemu_rdma_connect(rdma, errp, false);
 
     if (ret) {
         goto err;
@@ -4132,7 +4166,7 @@  void rdma_start_outgoing_migration(void *opaque,
             goto return_path_err;
         }
 
-        ret = qemu_rdma_connect(rdma_return_path, errp);
+        ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
         if (ret) {
             goto return_path_err;