source side always blocks if postcopy is only enabled at source side.
users are not able to cancel this migration in this case.

Here we try to get the cm_event every 100ms tile timeout.

Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com>
---
 migration/rdma.c | 59 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 3b228c46eb..181ad03849 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2458,7 +2458,54 @@ err_rdma_source_init:
     return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+#define RDMA_GET_EVENT_INTERVAL 100000 /* 100ms */
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+                                     struct rdma_cm_event **cm_event,
+                                     long sec, Error **errp)
+{
+    long wait_ns = 0;
+    int ret;
+    int flags = fcntl(rdma->channel->fd, F_GETFL), save_flags;
+
+    if (flags == -1) {
+        perror("failed to get file flags");
+        return flags;
+    }
+    save_flags = flags;
+    flags |= O_NONBLOCK;
+    ret = fcntl(rdma->channel->fd, F_SETFL, flags);
+    if (ret) {
+        perror("failed to set file flags nonblocking");
+        return ret;
+    }
+
+retry:
+    ret = rdma_get_cm_event(rdma->channel, cm_event);
+    if (ret && errno == EAGAIN) {
+        if (wait_ns < sec * 1000000) {
+            perror("rdma_get_cm_event after rdma_connect");
+            wait_ns += RDMA_GET_EVENT_INTERVAL;
+            usleep(RDMA_GET_EVENT_INTERVAL);
+            goto retry;
+        }
+    }
+    if (ret) {
+        perror("rdma_get_cm_event after rdma_connect");
+        ERROR(errp, "connecting to destination!");
+        return ret;
+    }
+
+    /* restore flags */
+    ret = fcntl(rdma->channel->fd, F_SETFL, save_flags);
+    if (ret) {
+        rdma_ack_cm_event(*cm_event);
+        perror("failed to restore file flags");
+    }
+
+    return ret;
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
     RDMACapabilities cap = {
                                 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2496,7 +2543,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error 
**errp)
         goto err_rdma_source_connect;
     }
 
-    ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    if (return_path) {
+        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 2, errp);
+    } else {
+        ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    }
     if (ret) {
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
@@ -4108,7 +4159,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, errp);
+    ret = qemu_rdma_connect(rdma, errp, false);
 
     if (ret) {
         goto err;
@@ -4129,7 +4180,7 @@ void rdma_start_outgoing_migration(void *opaque,
             goto return_path_err;
         }
 
-        ret = qemu_rdma_connect(rdma_return_path, errp);
+        ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
         if (ret) {
             goto return_path_err;
-- 
2.30.2




Reply via email to