diff mbox series

[RESEND,4/4] migration/rdma: source: get accept cm_event from return path in non-block mode

Message ID 20210520081148.17001-4-lizhijian@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path | expand

Commit Message

Li Zhijian May 20, 2021, 8:11 a.m. UTC
source side always blocks if postcopy is only enabled at source side.
users are not able to cancel this migration in this case.

Here we try to get the cm_event every 100ms tile timeout.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 migration/rdma.c | 59 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 3b228c46eb..181ad03849 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2458,7 +2458,54 @@  err_rdma_source_init:
     return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+#define RDMA_GET_EVENT_INTERVAL 100000 /* 100ms */
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+                                     struct rdma_cm_event **cm_event,
+                                     long sec, Error **errp)
+{
+    long wait_ns = 0;
+    int ret;
+    int flags = fcntl(rdma->channel->fd, F_GETFL), save_flags;
+
+    if (flags == -1) {
+        perror("failed to get file flags");
+        return flags;
+    }
+    save_flags = flags;
+    flags |= O_NONBLOCK;
+    ret = fcntl(rdma->channel->fd, F_SETFL, flags);
+    if (ret) {
+        perror("failed to set file flags nonblocking");
+        return ret;
+    }
+
+retry:
+    ret = rdma_get_cm_event(rdma->channel, cm_event);
+    if (ret && errno == EAGAIN) {
+        if (wait_ns < sec * 1000000) {
+            perror("rdma_get_cm_event after rdma_connect");
+            wait_ns += RDMA_GET_EVENT_INTERVAL;
+            usleep(RDMA_GET_EVENT_INTERVAL);
+            goto retry;
+        }
+    }
+    if (ret) {
+        perror("rdma_get_cm_event after rdma_connect");
+        ERROR(errp, "connecting to destination!");
+        return ret;
+    }
+
+    /* restore flags */
+    ret = fcntl(rdma->channel->fd, F_SETFL, save_flags);
+    if (ret) {
+        rdma_ack_cm_event(*cm_event);
+        perror("failed to restore file flags");
+    }
+
+    return ret;
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
     RDMACapabilities cap = {
                                 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2496,7 +2543,11 @@  static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         goto err_rdma_source_connect;
     }
 
-    ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    if (return_path) {
+        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 2, errp);
+    } else {
+        ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    }
     if (ret) {
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
@@ -4108,7 +4159,7 @@  void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, errp);
+    ret = qemu_rdma_connect(rdma, errp, false);
 
     if (ret) {
         goto err;
@@ -4129,7 +4180,7 @@  void rdma_start_outgoing_migration(void *opaque,
             goto return_path_err;
         }
 
-        ret = qemu_rdma_connect(rdma_return_path, errp);
+        ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
         if (ret) {
             goto return_path_err;