Public bug reported: Hello,
Please backport the following upstream commit that fixes reccurent QMP timeouts : https://gitlab.com/qemu- project/qemu/-/commit/effd60c878176bcaf97fa7ce2b12d04bb8ead6f7 This has been fixed in Noble and newer releases. Here is a reproducer to help identify the fix. Details on its usage is in comments #!/usr/bin/env python3 ############################################################################## # Reproducer script for QEMU hang in snapshot at boot # # Requires: `qemu.qmp` python package # # Fix: effd60c878176bcaf97fa7ce2b12d04bb8ead6f7 # # Description: # # Linux appears to write _something_ to the UEFI variables at boot. If a qmp # # transaction is stated during the write operation it can deadlock qemu. # ############################################################################## ############################## # Expected backtrace # # [...] # # aio_poll # # [...] # # qmp_blockdev_snapshot_sync # # [...] # # aio_poll # # [...] # # pflash_write # # [...] # ############################## ######################## Setup ######################### # $ pip install qemu.qmp # $ cp /usr/share/OVMF/OVMF_VARS_4M.fd /tmp/vars.fd # $ wget https://github.com/cirros-dev/cirros/releases/download/0.6.3/cirros-0.6.3-x86_64-rootfs.img.gz # $ gunzip cirros-0.6.3-x86_64-rootfs.img.gz # $ qemu-img convert -f raw -O qcow2 cirros-0.6.3-x86_64-rootfs.img /tmp/disk.qcow2.ref # $ rm -f cirros-0.6.3-x86_64-rootfs.img ######################################################## import asyncio import logging import os import subprocess from qemu.qmp import QMPClient SOCKET = '/tmp/qmp-deadlock.sock' FW = '/usr/share/OVMF/OVMF_CODE_4M.fd' DISK = '/tmp/disk.qcow2' SNAP_FILE = '/tmp/snap.qcow2' FW_VARS = '/tmp/vars.fd' async def spawn_qemu(): blk_args = [ 'driver=qcow2', 'node-name=snap-disk', 'file.driver=file', f'file.filename={DISK}', ] cmd = [ 'qemu-system-x86_64', '-qmp', f'unix:{SOCKET}', '-blockdev', ','.join(blk_args), '-device', 'virtio-blk,drive=snap-disk', '-drive', f'if=pflash,format=raw,readonly=on,file={FW}', '-drive', f'if=pflash,format=raw,file={FW_VARS}', '-m', '1G', '-nographic', #'-enable-kvm', ] #print(' '.join(cmd)) return await asyncio.create_subprocess_exec( *cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, ) async def snap_rollback(qmp): await qmp.execute('blockdev-snapshot-sync', { 'node-name': 'snap-disk', 'snapshot-node-name': 'snap', 'snapshot-file': SNAP_FILE, }) with qmp.listener('BLOCK_JOB_READY') as listener: await qmp.execute('block-commit', { 'device': 'snap', 'job-id': 'commit', }) async for event in listener: if event.get('data', {}).get('device') == 'commit': break with qmp.listener('BLOCK_JOB_COMPLETED') as listener: await qmp.execute('block-job-complete', { 'device': 'commit', }) async for event in listener: if event.get('data', {}).get('device') == 'commit': break async def qmp_main(qmp): while True: await asyncio.wait_for(snap_rollback(qmp), timeout=15) async def main(): #logging.basicConfig(level=logging.DEBUG) qmp = QMPClient('test-deadlock') await qmp.start_server(SOCKET) qemu, _ = await asyncio.gather(spawn_qemu(), qmp.accept()) print(f'qemu pid: {qemu.pid}') try: await qmp_main(qmp) except asyncio.TimeoutError: print("QMP timeout, exiting") finally: try: await qmp.disconnect() finally: qemu.kill() await qemu.wait() asyncio.run(main()) A debdiff with the patch will come shortly ** Affects: qemu (Ubuntu) Importance: Undecided Status: Fix Released ** Affects: qemu (Ubuntu Jammy) Importance: Undecided Status: New ** Affects: qemu (Ubuntu Noble) Importance: Undecided Status: Fix Released ** Affects: qemu (Ubuntu Oracular) Importance: Undecided Status: Fix Released ** Affects: qemu (Ubuntu Plucky) Importance: Undecided Status: Fix Released ** Also affects: qemu (Ubuntu Noble) Importance: Undecided Status: New ** Also affects: qemu (Ubuntu Jammy) Importance: Undecided Status: New ** Also affects: qemu (Ubuntu Oracular) Importance: Undecided Status: New ** Also affects: qemu (Ubuntu Plucky) Importance: Undecided Status: New ** Changed in: qemu (Ubuntu Noble) Status: New => Fix Released ** Changed in: qemu (Ubuntu Oracular) Status: New => Fix Released ** Changed in: qemu (Ubuntu Plucky) Status: New => Fix Released -- You received this bug notification because you are a member of Ubuntu Bugs, which is subscribed to Ubuntu. https://bugs.launchpad.net/bugs/2091013 Title: Jammy : Please backport upstream fix to qmp timeouts To manage notifications about this bug go to: https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/2091013/+subscriptions -- ubuntu-bugs mailing list ubuntu-bugs@lists.ubuntu.com https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs