From: Alexander Duyck <alexanderdu...@fb.com> When I run Multi-process QEMU with an e1000 as the remote device and SMP enabled I see the combination lock up and become unresponsive. The QEMU build is a fairly standard x86_64-softmmu setup. After doing some digging I tracked the lockup down to the what appears to be a race with the mpqemu-link msg_send and msg_receive functions and the reacquisition of the lock.
I am assuming the issue is some sort of lock inversion though I haven't identified exactly what the other lock involved is yet. For now removing the logic to unlock the iothread and then reacquire the lock seems to resolve the issue. I am assuming the releasing of the lock was some form of optimization but I am not certain so I am submitting this as an RFC. Signed-off-by: Alexander Duyck <alexanderdu...@fb.com> --- hw/remote/mpqemu-link.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index 9bd98e82197e..3e7818f54a63 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -33,7 +33,6 @@ */ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) { - bool iolock = qemu_mutex_iothread_locked(); bool iothread = qemu_in_iothread(); struct iovec send[2] = {}; int *fds = NULL; @@ -57,16 +56,6 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) */ assert(qemu_in_coroutine() || !iothread); - /* - * Skip unlocking/locking iothread lock when the IOThread is running - * in co-routine context. Co-routine context is asserted above - * for IOThread case. - * Also skip lock handling while in a co-routine in the main context. - */ - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); - } - if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), fds, nfds, 0, errp)) { ret = true; @@ -74,11 +63,6 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); } - if (iolock && !iothread && !qemu_in_coroutine()) { - /* See above comment why skip locking here. */ - qemu_mutex_lock_iothread(); - } - return ret; } @@ -96,7 +80,6 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, size_t *nfds, Error **errp) { struct iovec iov = { .iov_base = buf, .iov_len = len }; - bool iolock = qemu_mutex_iothread_locked(); bool iothread = qemu_in_iothread(); int ret = -1; @@ -106,16 +89,8 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, */ assert(qemu_in_coroutine() || !iothread); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); - } - ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_lock_iothread(); - } - return (ret <= 0) ? ret : iov.iov_len; }