Hello Powerpc folks, I encounter an kdump bug, which I bisect and pin commit 174db9e7f775 ("powerpc/pseries/pci: Add support of MSI domains to PHB hotplug")
In that case, using Fedora 36 as host, the mentioned commit as the guest kernel, and virto-block disk, the kdump kernel will hang: [ 0.000000] Kernel command line: elfcorehdr=0x22c00000 no_timer_check net.ifnames=0 console=tty0 console=hvc0,115200n8 irqpoll maxcpus=1 noirqdistrib reset_devices cgroup_disable=memory numa=off udev.children-max=2 ehea.use_mcs=0 panic=10 kvm_cma_resv_ratio=0 transparent_hugepage=never novmcoredd hugetlb_cma=0 ... [ 7.763260] virtio_blk virtio2: 32/0/0 default/read/poll queues [ 7.771391] virtio_blk virtio2: [vda] 20971520 512-byte logical blocks (10.7 GB/10.0 GiB) [ 68.398234] systemd-udevd[187]: virtio2: Worker [190] processing SEQNUM=1193 is taking a long time [ 188.398258] systemd-udevd[187]: virtio2: Worker [190] processing SEQNUM=1193 killed During my test, I found that in very rare cases, the kdump can success (I guess it may be due to the cpu id). And if using either maxcpus=2 or using scsi-disk, then kdump can also success. And before the mentioned commit, kdump can also success. The attachment contains the xml to reproduce that bug. Do you have any ideas? Thanks
<domain type="kvm"> <name>rhel9</name> <uuid>6266c1c1-1e74-4046-b959-33d94877b387</uuid> <metadata> <libosinfo:libosinfo xmlns:libosinfo="http://libosinfo.org/xmlns/libvirt/domain/1.0"> <libosinfo:os id="http://redhat.com/rhel/8-unknown"/> </libosinfo:libosinfo> </metadata> <memory unit="KiB">16777216</memory> <currentMemory unit="KiB">16777216</currentMemory> <vcpu placement="static">16</vcpu> <os> <type arch="ppc64le" machine="pseries-rhel8.6.0">hvm</type> <boot dev="hd"/> </os> <cpu mode="custom" match="exact" check="none"> <model fallback="forbid">POWER9</model> </cpu> <clock offset="utc"/> <on_poweroff>destroy</on_poweroff> <on_reboot>restart</on_reboot> <on_crash>destroy</on_crash> <devices> <emulator>/usr/libexec/qemu-kvm</emulator> <disk type="file" device="disk"> <driver name="qemu" type="qcow2"/> <source file="/var/lib/libvirt/images/rhel-guest-image-9.1-20220701.0.ppc64le.qcow2"/> <target dev="vda" bus="virtio"/> <address type="pci" domain="0x0000" bus="0x00" slot="0x04" function="0x0"/> </disk> <controller type="usb" index="0" model="qemu-xhci" ports="15"> <address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x0"/> </controller> <controller type="pci" index="0" model="pci-root"> <model name="spapr-pci-host-bridge"/> <target index="0"/> </controller> <controller type="virtio-serial" index="0"> <address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x0"/> </controller> <interface type="network"> <mac address="52:54:00:74:c9:50"/> <source network="default"/> <model type="virtio"/> <address type="pci" domain="0x0000" bus="0x00" slot="0x01" function="0x0"/> </interface> <serial type="pty"> <target type="spapr-vio-serial" port="0"> <model name="spapr-vty"/> </target> <address type="spapr-vio" reg="0x30000000"/> </serial> <console type="pty"> <target type="serial" port="0"/> <address type="spapr-vio" reg="0x30000000"/> </console> <channel type="unix"> <target type="virtio" name="org.qemu.guest_agent.0"/> <address type="virtio-serial" controller="0" bus="0" port="1"/> </channel> <input type="tablet" bus="usb"> <address type="usb" bus="0" port="1"/> </input> <input type="keyboard" bus="usb"> <address type="usb" bus="0" port="2"/> </input> <tpm model="tpm-spapr"> <backend type="emulator" version="2.0"/> <address type="spapr-vio" reg="0x00004000"/> </tpm> <graphics type="vnc" port="-1" autoport="yes"> <listen type="address"/> </graphics> <audio id="1" type="none"/> <video> <model type="vga" vram="16384" heads="1" primary="yes"/> <address type="pci" domain="0x0000" bus="0x00" slot="0x07" function="0x0"/> </video> <memballoon model="virtio"> <address type="pci" domain="0x0000" bus="0x00" slot="0x05" function="0x0"/> </memballoon> <rng model="virtio"> <backend model="random">/dev/urandom</backend> <address type="pci" domain="0x0000" bus="0x00" slot="0x06" function="0x0"/> </rng> <panic model="pseries"/> </devices> </domain>