Hello All,
I am facing a lock situation between main-loop thread 1 and vcpu thread
4 when doing a qmp snapshot. QEMU is running on 6.0.x, checked the
upstream code and did not see any big change since between. Guest is a
Windows 10 VM. Unfortunately, I could not get into the windows vm or
reproduce the issue by myself. No iothread is used here, native aio only.
From the code,
-> AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
--> aio_poll(qemu_get_aio_context(), true);
Mainloop mutex is locked when start snapshot in thread 1, vcpu released
thread lock when address_space_rw and try to get thread lock again in
prepare_mmio_access.
It seems main loop thread is stuck at aio_poll with blocking, but I can
not figure out what the addr=4275044592 belongs to from mmio read.
I do not quite understand what really happens here, either block jobs
never drained out or maybe a block io read from vcpu and cause a
deadlock? I hope domain experts here could help figure out the root
cause, thanks in advance and let me know if need any further information.
Regards,
Liang
(gdb) thread 1
[Switching to thread 1 (Thread 0x7f9ebcf96040 (LWP 358660))]
#0 0x00007f9ec6eb4ac6 in __ppoll (fds=0x562dda80bc90, nfds=2,
timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0)
at ../sysdeps/unix/sysv/linux/ppoll.c:44
44 ../sysdeps/unix/sysv/linux/ppoll.c: No such file or directory.
(gdb) bt
#0 0x00007f9ec6eb4ac6 in __ppoll (fds=0x562dda80bc90, nfds=2,
timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0)
at ../sysdeps/unix/sysv/linux/ppoll.c:44
#1 0x0000562dd7f5a409 in ppoll (__ss=0x0, __timeout=0x0,
__nfds=<optimized out>, __fds=<optimized out>) at
/usr/include/x86_64-linux-gnu/bits/poll2.h:77
#2 qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>,
timeout=timeout@entry=-1) at ../../util/qemu-timer.c:336
#3 0x0000562dd7f93de9 in fdmon_poll_wait (ctx=0x562dda193860,
ready_list=0x7ffedaeb3f48, timeout=-1) at ../../util/fdmon-poll.c:80
#4 0x0000562dd7f6d05b in aio_poll (ctx=<optimized out>,
blocking=blocking@entry=true) at ../../util/aio-posix.c:607
#5 0x0000562dd7e67e54 in bdrv_drain_all_begin () at ../../block/io.c:642
#6 bdrv_drain_all_begin () at ../../block/io.c:607
#7 0x0000562dd7e68a6d in bdrv_drain_all () at ../../block/io.c:693
#8 0x0000562dd7e54963 in qmp_transaction
(dev_list=dev_list@entry=0x7ffedaeb4070,
has_props=has_props@entry=false, props=0x562dda803910, props@entry=0x0,
errp=errp@entry=0x7ffedaeb4128)
at ../../blockdev.c:2348
#9 0x0000562dd7e54d5b in blockdev_do_action (errp=0x7ffedaeb4128,
action=0x7ffedaeb4060) at ../../blockdev.c:1055
#10 qmp_blockdev_snapshot_sync (has_device=<optimized out>,
device=<optimized out>, has_node_name=<optimized out>,
node_name=<optimized out>, snapshot_file=<optimized out>,
has_snapshot_node_name=<optimized out>,
snapshot_node_name=0x562dda83c970 "hvd-snapshot", has_format=false,
format=0x0, has_mode=false, mode=NEW_IMAGE_MODE_EXISTING,
errp=0x7ffedaeb4128)
at ../../blockdev.c:1083
#11 0x0000562dd7f0e5aa in qmp_marshal_blockdev_snapshot_sync
(args=<optimized out>, ret=<optimized out>, errp=0x7f9ebc61ae90) at
qapi/qapi-commands-block-core.c:221
#12 0x0000562dd7f5c5db in do_qmp_dispatch_bh (opaque=0x7f9ebc61aea0) at
../../qapi/qmp-dispatch.c:131
#13 0x0000562dd7f5dc27 in aio_bh_call (bh=0x7f9e3000b760) at
../../util/async.c:164
#14 aio_bh_poll (ctx=ctx@entry=0x562dda193860) at ../../util/async.c:164
#15 0x0000562dd7f6ca82 in aio_dispatch (ctx=0x562dda193860) at
../../util/aio-posix.c:381
#16 0x0000562dd7f5da42 in aio_ctx_dispatch (source=<optimized out>,
callback=<optimized out>, user_data=<optimized out>) at
../../util/async.c:306
#17 0x00007f9ec7ade17d in g_main_context_dispatch () from
/lib/x86_64-linux-gnu/libglib-2.0.so.0
#18 0x0000562dd7f4f320 in glib_pollfds_poll () at ../../util/main-loop.c:231
#19 os_host_main_loop_wait (timeout=<optimized out>) at
../../util/main-loop.c:254
#20 main_loop_wait (nonblocking=nonblocking@entry=0) at
../../util/main-loop.c:530
#21 0x0000562dd7d3cfd9 in qemu_main_loop () at ../../softmmu/runstate.c:725
#22 0x0000562dd7b7aa82 in main (argc=<optimized out>, argv=<optimized
out>, envp=<optimized out>) at ../../softmmu/main.c:50
(gdb) thread 4
[Switching to thread 4 (Thread 0x7f9e377fd700 (LWP 358668))]
#0 __lll_lock_wait (futex=futex@entry=0x562dd8337a60
<qemu_global_mutex>, private=0) at lowlevellock.c:52
52 lowlevellock.c: No such file or directory.
(gdb) bt
#0 __lll_lock_wait (futex=futex@entry=0x562dd8337a60
<qemu_global_mutex>, private=0) at lowlevellock.c:52
#1 0x00007f9ec6f9f0a3 in __GI___pthread_mutex_lock
(mutex=mutex@entry=0x562dd8337a60 <qemu_global_mutex>) at
../nptl/pthread_mutex_lock.c:80
#2 0x0000562dd7f667c8 in qemu_mutex_lock_impl (mutex=0x562dd8337a60
<qemu_global_mutex>, file=0x562dd804c76c "../../softmmu/physmem.c",
line=2742) at ../../util/qemu-thread-posix.c:79
#3 0x0000562dd7dca8ce in qemu_mutex_lock_iothread_impl
(file=file@entry=0x562dd804c76c "../../softmmu/physmem.c",
line=line@entry=2742) at ../../softmmu/cpus.c:491
#4 0x0000562dd7da2e91 in prepare_mmio_access (mr=<optimized out>) at
../../softmmu/physmem.c:2742
#5 0x0000562dd7da8bbb in flatview_read_continue
(fv=fv@entry=0x7f9e2827a4c0, addr=addr@entry=4275044592, attrs=...,
ptr=ptr@entry=0x7f9ebcef7028, len=len@entry=4, addr1=<optimized out>,
l=<optimized out>,
mr=0x562dda576f10) at ../../softmmu/physmem.c:2834
#6 0x0000562dd7da8df3 in flatview_read (fv=0x7f9e2827a4c0,
addr=4275044592, attrs=..., buf=0x7f9ebcef7028, len=4) at
../../softmmu/physmem.c:2875
#7 0x0000562dd7da8f1f in address_space_read_full (as=<optimized out>,
addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized
out>) at ../../softmmu/physmem.c:2888
#8 0x0000562dd7da9045 in address_space_rw (as=<optimized out>,
addr=<optimized out>, attrs=..., attrs@entry=...,
buf=buf@entry=0x7f9ebcef7028, len=<optimized out>, is_write=<optimized out>)
at ../../softmmu/physmem.c:2916
#9 0x0000562dd7dfd7a8 in kvm_cpu_exec (cpu=cpu@entry=0x7f9ebc42b010) at
../../accel/kvm/kvm-all.c:2516
#10 0x0000562dd7d61c75 in kvm_vcpu_thread_fn
(arg=arg@entry=0x7f9ebc42b010) at ../../accel/kvm/kvm-accel-ops.c:49
#11 0x0000562dd7f66523 in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:521
#12 0x00007f9ec6f9c609 in start_thread (arg=<optimized out>) at
pthread_create.c:477
#13 0x00007f9ec6ec1163 in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) print lock
$1 = {lock = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers =
0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0,
__next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0},
initialized = false}
(gdb) p *(pthread_mutex_t *)0x562dd8337a60
$3 = {__data = {__lock = 2, __count = 0, __owner = 358660, __nusers = 1,
__kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next =
0x0}},
__size = "\002\000\000\000\000\000\000\000\004y\005\000\001", '\000'
<repeats 26 times>, __align = 2}