When we 'migrate_cancel' a multifd migration, live_migration thread may hung forever at some points, because of multifd_send_thread has already exit for socket error: 1. multifd_send_pages may hung at qemu_sem_wait(&multifd_send_state-> channels_ready) 2. multifd_send_sync_main my hung at qemu_sem_wait(&multifd_send_state-> sem_sync)
Signed-off-by: Ivan Ren <ivan...@tencent.com> --- migration/ram.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index f8908286c2..e4eb9c441f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1097,7 +1097,11 @@ static void *multifd_send_thread(void *opaque) { MultiFDSendParams *p = opaque; Error *local_err = NULL; - int ret; + int ret = 0; + + uint32_t used = 0; + uint64_t packet_num = 0; + uint32_t flags = 0; trace_multifd_send_thread_start(p->id); rcu_register_thread(); @@ -1113,9 +1117,9 @@ static void *multifd_send_thread(void *opaque) qemu_mutex_lock(&p->mutex); if (p->pending_job) { - uint32_t used = p->pages->used; - uint64_t packet_num = p->packet_num; - uint32_t flags = p->flags; + used = p->pages->used; + packet_num = p->packet_num; + flags = p->flags; p->next_packet_size = used * qemu_target_page_size(); multifd_send_fill_packet(p); @@ -1164,6 +1168,17 @@ out: multifd_send_terminate_threads(local_err); } + /* + * Error happen, I will exit, but I can't just leave, tell + * who pay attention to me. + */ + if (ret != 0) { + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_send_state->sem_sync); + } + qemu_sem_post(&multifd_send_state->channels_ready); + } + qemu_mutex_lock(&p->mutex); p->running = false; qemu_mutex_unlock(&p->mutex); -- 2.17.2 (Apple Git-113)