On 6/18/21 4:17 PM, Alex Bennée wrote: > > Andrey Shinkevich <andrey.shinkev...@huawei.com> writes: > >> Dear Shashi, >> >> I have applied the version 4 of the series "GICv3 LPI and ITS feature >> implementation" right after the commit 3e9f48b as before (because the >> GCCv7.5 is unavailable in the YUM repository for CentOS-7.9). >> >> The guest OS still hangs at its start when QEMU is configured with 4 or >> more vCPUs (with 1 to 3 vCPUs the guest starts and runs OK and the MTTCG >> works properly): > > Does QEMU itself hang? If you attach gdb to QEMU and do: > > thread apply all bt > > that should dump the backtrace for all threads. Could you post the backtrace? > Thank you, Alex, for your response.
Yes, it is QEMU that hangs The dump of gdb command '# thr a a bt' is below : Thread 7 (Thread 0x7f476e489700 (LWP 24967)): #0 0x00007f477c2bbd19 in syscall () at /lib64/libc.so.6 #1 0x000055747d41a270 in qemu_event_wait (val=<optimized out>, f=<optimized out>) at /home/andy/git/qemu/include/qemu/futex.h:29 #2 0x000055747d41a270 in qemu_event_wait (ev=ev@entry=0x55747e051c28 <rcu_call_ready_event>) at ../util/qemu-thread-posix.c:460 #3 0x000055747d444d78 in call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:258 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 6 (Thread 0x7f472ce42700 (LWP 24970)): #0 0x00007f477c2b6ccd in poll () at /lib64/libc.so.6 #1 0x00007f47805c137c in g_main_context_iterate.isra.19 () at /lib64/libglib-2.0.so.0 #2 0x00007f47805c16ca in g_main_loop_run () at /lib64/libglib-2.0.so.0 #3 0x000055747d29b071 in iothread_run (opaque=opaque@entry=0x55747f85f280) at ../iothread.c:80 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 5 (Thread 0x7f461f9ff700 (LWP 24971)): #0 0x00007f477c59ca35 in pthread_cond_wait@@GLIBC_2.3.2 () at /lib64/libpthread.so.0 #1 0x000055747d419b1d in qemu_cond_wait_impl (cond=0x55747f916670, mutex=0x55747e04dc00 <qemu_global_mutex>, file=0x55747d5dbe5c "../softmmu/cpus.c", line=417) at ../util/qemu-thread-posix.c:174 #2 0x000055747d20ae36 in qemu_wait_io_event (cpu=cpu@entry=0x55747f8b7920) at ../softmmu/cpus.c:417 #3 0x000055747d18d6a1 in mttcg_cpu_thread_fn (arg=arg@entry=0x55747f8b7920) at ../accel/tcg/tcg-accel-ops-mttcg.c:98 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 4 (Thread 0x7f461f1fe700 (LWP 24972)): #0 0x00007f477c59ca35 in pthread_cond_wait@@GLIBC_2.3.2 () at /lib64/libpthread.so.0 #1 0x000055747d419b1d in qemu_cond_wait_impl (cond=0x55747f9897e0, mutex=0x55747e04dc00 <qemu_global_mutex>, file=0x55747d5dbe5c "../softmmu/cpus.c", line=417) at ../util/qemu-thread-posix.c:174 #2 0x000055747d20ae36 in qemu_wait_io_event (cpu=cpu@entry=0x55747f924bc0) at ../softmmu/cpus.c:417 #3 0x000055747d18d6a1 in mttcg_cpu_thread_fn (arg=arg@entry=0x55747f924bc0) at ../accel/tcg/tcg-accel-ops-mttcg.c:98 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 3 (Thread 0x7f461e9fd700 (LWP 24973)): #0 0x00007f477c59ca35 in pthread_cond_wait@@GLIBC_2.3.2 () at /lib64/libpthread.so.0 #1 0x000055747d419b1d in qemu_cond_wait_impl (cond=0x55747f9f5b40, mutex=0x55747e04dc00 <qemu_global_mutex>, file=0x55747d5dbe5c "../softmmu/cpus.c", line=417) at ../util/qemu-thread-posix.c:174 #2 0x000055747d20ae36 in qemu_wait_io_event (cpu=cpu@entry=0x55747f990ba0) at ../softmmu/cpus.c:417 #3 0x000055747d18d6a1 in mttcg_cpu_thread_fn (arg=arg@entry=0x55747f990ba0) at ../accel/tcg/tcg-accel-ops-mttcg.c:98 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 2 (Thread 0x7f461e1fc700 (LWP 24974)): #0 0x00007f477c59ca35 in pthread_cond_wait@@GLIBC_2.3.2 () at /lib64/libpthread.so.0 ---Type <return> to continue, or q <return> to quit--- #1 0x000055747d419b1d in qemu_cond_wait_impl (cond=0x55747fa626c0, mutex=0x55747e04dc00 <qemu_global_mutex>, file=0x55747d5dbe5c "../softmmu/cpus.c", line=417) at ../util/qemu-thread-posix.c:174 #2 0x000055747d20ae36 in qemu_wait_io_event (cpu=cpu@entry=0x55747f9fcf00) at ../softmmu/cpus.c:417 #3 0x000055747d18d6a1 in mttcg_cpu_thread_fn (arg=arg@entry=0x55747f9fcf00) at ../accel/tcg/tcg-accel-ops-mttcg.c:98 #4 0x000055747d419406 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:521 #5 0x00007f477c598ea5 in start_thread () at /lib64/libpthread.so.0 #6 0x00007f477c2c19fd in clone () at /lib64/libc.so.6 Thread 1 (Thread 0x7f4781db4d00 (LWP 24957)): #0 0x00007f477c2b6d8f in ppoll () at /lib64/libc.so.6 #1 0x000055747d431439 in qemu_poll_ns (__ss=0x0, __timeout=0x7ffcc3188330, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/bits/poll2.h:77 #2 0x000055747d431439 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=3792947) at ../util/qemu-timer.c:348 #3 0x000055747d4466ce in main_loop_wait (timeout=<optimized out>) at ../util/main-loop.c:249 #4 0x000055747d4466ce in main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:530 #5 0x000055747d2695c7 in qemu_main_loop () at ../softmmu/runstate.c:725 #6 0x000055747ccc1bde in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at ../softmmu/main.c:50 (gdb) Andrey ... >> >> >> I run QEMU with virt-manager as this: >> >> qemu 7311 1 70 19:15 ? 00:00:05 >> /usr/local/bin/qemu-system-aarch64 -name >> guest=EulerOS-2.8-Rich,debug-threads=on -S -object >> secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-95-EulerOS-2.8-Rich/master-key.aes >> -machine virt-6.1,accel=tcg,usb=off,dump-guest-core=off,gic-version=3 >> -cpu max -drive >> file=/usr/share/AAVMF/AAVMF_CODE.fd,if=pflash,format=raw,unit=0,readonly=on >> -drive >> file=/var/lib/libvirt/qemu/nvram/EulerOS-2.8-Rich_VARS.fd,if=pflash,format=raw,unit=1 >> -m 4096 -smp 4,sockets=4,cores=1,threads=1 -uuid >> c95e0e92-011b-449a-8e3f-b5f0938aaaa7 -display none -no-user-config >> -nodefaults -chardev socket,id=charmonitor,fd=26,server,nowait -mon >> chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown >> -boot strict=on -device >> pcie-root-port,port=0x8,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x1 >> -device >> pcie-root-port,port=0x9,chassis=2,id=pci.2,bus=pcie.0,addr=0x1.0x1 >> -device >> pcie-root-port,port=0xa,chassis=3,id=pci.3,bus=pcie.0,addr=0x1.0x2 >> -device >> pcie-root-port,port=0xb,chassis=4,id=pci.4,bus=pcie.0,addr=0x1.0x3 >> -device qemu-xhci,p2=8,p3=8,id=usb,bus=pci.2,addr=0x0 -device >> virtio-scsi-pci,id=scsi0,bus=pci.3,addr=0x0 -drive >> file=/var/lib/libvirt/images/EulerOS-2.8-Rich.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-0 >> -device >> scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 >> -drive if=none,id=drive-scsi0-0-0-1,readonly=on -device >> scsi-cd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-0-1,id=scsi0-0-0-1 >> -netdev tap,fd=28,id=hostnet0 -device >> virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:f9:e0:69,bus=pci.1,addr=0x0 >> -chardev pty,id=charserial0 -serial chardev:charserial0 -msg timestamp=on >> >> The issue is reproducible and persists. >> 1. Do you think that applying the series results in the dead lock in >> MTTCG? Or it may be other reason? >> 2. Which piece of QEMU source code should I investigate to locate the issue? >> >> Best regards, >> Andrey Shinkevich >> >> ... >