source side always blocks if postcopy is only enabled at source side. users are not able to cancel this migration in this case.
Here we try to get the cm_event every 100ms tile timeout. Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> --- migration/rdma.c | 59 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 3b228c46eb..181ad03849 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2458,7 +2458,54 @@ err_rdma_source_init: return -1; } -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) +#define RDMA_GET_EVENT_INTERVAL 100000 /* 100ms */ +static int qemu_get_cm_event_timeout(RDMAContext *rdma, + struct rdma_cm_event **cm_event, + long sec, Error **errp) +{ + long wait_ns = 0; + int ret; + int flags = fcntl(rdma->channel->fd, F_GETFL), save_flags; + + if (flags == -1) { + perror("failed to get file flags"); + return flags; + } + save_flags = flags; + flags |= O_NONBLOCK; + ret = fcntl(rdma->channel->fd, F_SETFL, flags); + if (ret) { + perror("failed to set file flags nonblocking"); + return ret; + } + +retry: + ret = rdma_get_cm_event(rdma->channel, cm_event); + if (ret && errno == EAGAIN) { + if (wait_ns < sec * 1000000) { + perror("rdma_get_cm_event after rdma_connect"); + wait_ns += RDMA_GET_EVENT_INTERVAL; + usleep(RDMA_GET_EVENT_INTERVAL); + goto retry; + } + } + if (ret) { + perror("rdma_get_cm_event after rdma_connect"); + ERROR(errp, "connecting to destination!"); + return ret; + } + + /* restore flags */ + ret = fcntl(rdma->channel->fd, F_SETFL, save_flags); + if (ret) { + rdma_ack_cm_event(*cm_event); + perror("failed to restore file flags"); + } + + return ret; +} + +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) { RDMACapabilities cap = { .version = RDMA_CONTROL_VERSION_CURRENT, @@ -2496,7 +2543,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) goto err_rdma_source_connect; } - ret = rdma_get_cm_event(rdma->channel, &cm_event); + if (return_path) { + ret = qemu_get_cm_event_timeout(rdma, &cm_event, 2, errp); + } else { + ret = rdma_get_cm_event(rdma->channel, &cm_event); + } if (ret) { perror("rdma_get_cm_event after rdma_connect"); ERROR(errp, "connecting to destination!"); @@ -4108,7 +4159,7 @@ void rdma_start_outgoing_migration(void *opaque, } trace_rdma_start_outgoing_migration_after_rdma_source_init(); - ret = qemu_rdma_connect(rdma, errp); + ret = qemu_rdma_connect(rdma, errp, false); if (ret) { goto err; @@ -4129,7 +4180,7 @@ void rdma_start_outgoing_migration(void *opaque, goto return_path_err; } - ret = qemu_rdma_connect(rdma_return_path, errp); + ret = qemu_rdma_connect(rdma_return_path, errp, true); if (ret) { goto return_path_err; -- 2.30.2