Hi, Apologies for the wide CC but I'm stuck trying to work out what is going on. I've had a very frustrating few weeks trying to enable a basic vhost-user test for the gpio stub device. I've been attempting to model it on the existing vhost-user-test for virtio-net but I think I've triggered some sort of subtle race condition but it's very hard to work out amongst the moving pieces. I think this comes down to the fact that the GPIO device supports the VHOST_USER_GET_CONFIG message and this subtly breaks the test harness as can be seen by the following backtrace:
#0 virtio_pci_set_guest_notifiers (d=0x5615571bf620, nvqs=2, assign=false) at ../../hw/virtio/virtio-pci.c:990 #1 0x0000561553a585d5 in vu_gpio_stop (vdev=0x5615571c79c0) at ../../hw/virtio/vhost-user-gpio.c:106 #2 0x0000561553a58958 in vu_gpio_disconnect (dev=0x5615571c79c0) at ../../hw/virtio/vhost-user-gpio.c:215 #3 0x0000561553a589fa in vu_gpio_event (opaque=0x5615571c79c0, event=CHR_EVENT_CLOSED) at ../../hw/virtio/vhost-user-gpio.c:233 #4 0x0000561553bf47ac in chr_be_event (s=0x561556608000, event=CHR_EVENT_CLOSED) at ../../chardev/char.c:61 #5 0x0000561553bf4820 in qemu_chr_be_event (s=0x561556608000, event=CHR_EVENT_CLOSED) at ../../chardev/char.c:81 #6 0x0000561553bf06f9 in tcp_chr_disconnect_locked (chr=0x561556608000) at ../../chardev/char-socket.c:470 #7 0x0000561553bf0768 in tcp_chr_disconnect (chr=0x561556608000) at ../../chardev/char-socket.c:480 #8 0x0000561553bf094c in tcp_chr_hup (channel=0x561556532c00, cond=G_IO_HUP, opaque=0x561556608000) at ../../chardev/char-socket.c:520 #9 0x0000561553ae7ab5 in qio_channel_fd_source_dispatch (source=0x5615567a6c00, callback=0x561553bf091d <tcp_chr_hup>, user_data=0x561556608000) at ../../io/channel-watch.c:84 #10 0x00007f905b47fd6f in g_main_dispatch (context=0x5615571fff00) at ../../../glib/gmain.c:3325 #11 g_main_context_dispatch (context=0x5615571fff00) at ../../../glib/gmain.c:4043 #12 0x00007f905b480118 in g_main_context_iterate (context=0x5615571fff00, block=block@entry=1, dispatch=dispatch@entry=1, self=<optimized out>) at ../../../glib/gmain.c:4119 #13 0x00007f905b48040b in g_main_loop_run (loop=0x561557200170) at #../../../glib/gmain.c:4317 For a not totally clear reason we dispatch a new run loop and discover the vhost user socket has been closed down. This triggers the above stop which then asserts because we haven't had a chance to fully initialise the device yet. #14 0x0000561553a36fc3 in vhost_user_read (dev=0x5615571c7be0, msg=0x7ffcac609df0) at ../../hw/virtio/vhost-user.c:406 #15 0x0000561553a3be39 in vhost_user_get_config (dev=0x5615571c7be0, config=0x5615571c7bd0 "", config_len=8, errp=0x7ffcac60a120) at ../../hw/virtio/vhost-user.c:2304 #16 0x0000561553a344b6 in vhost_dev_get_config (hdev=0x5615571c7be0, config=0x5615571c7bd0 "", config_len=8, errp=0x7ffcac60a120) at ../../hw/virtio/vhost.c:1593 #17 0x0000561553a58c0e in vu_gpio_device_realize (dev=0x5615571c79c0, errp=0x7ffcac60a120) at ../../hw/virtio/vhost-user-gpio.c:283 #18 0x0000561553a2ec5d in virtio_device_realize (dev=0x5615571c79c0, errp=0x7ffcac60a180) at ../../hw/virtio/virtio.c:3623 #19 0x0000561553ad5ee0 in device_set_realized (obj=0x5615571c79c0, value=true, errp=0x7ffcac60a428) at ../../hw/core/qdev.c:531 #20 0x0000561553adf935 in property_set_bool (obj=0x5615571c79c0, v=0x5615571cfbe0, name=0x561553ef77d1 "realized", opaque=0x5615564a4b80, errp=0x7ffcac60a428) at ../../qom/object.c:2273 #21 0x0000561553add97c in object_property_set (obj=0x5615571c79c0, name=0x561553ef77d1 "realized", v=0x5615571cfbe0, errp=0x7ffcac60a428) at ../../qom/object.c:1408 #22 0x0000561553ae1d27 in object_property_set_qobject (obj=0x5615571c79c0, name=0x561553ef77d1 "realized", value=0x5615571cfbc0, errp=0x7ffcac60a428) at ../../qom/qom-qobject.c:28 #23 0x0000561553addce1 in object_property_set_bool (obj=0x5615571c79c0, name=0x561553ef77d1 "realized", value=true, errp=0x7ffcac60a428) at ../../qom/object.c:1477 #24 0x0000561553ad57a8 in qdev_realize (dev=0x5615571c79c0, bus=0x5615571c7938, errp=0x7ffcac60a428) at ../../hw/core/qdev.c:333 #25 0x0000561553a58fe0 in vhost_user_gpio_pci_realize (vpci_dev=0x5615571bf620, errp=0x7ffcac60a428) at ../../hw/virtio/vhost-user-gpio-pci.c:32 #26 0x0000561553679b82 in virtio_pci_realize (pci_dev=0x5615571bf620, errp=0x7ffcac60a428) at ../../hw/virtio/virtio-pci.c:1926 #27 0x000056155357a983 in pci_qdev_realize (qdev=0x5615571bf620, errp=0x7ffcac60a4e0) at ../../hw/pci/pci.c:2191 #28 0x0000561553679f26 in virtio_pci_dc_realize (qdev=0x5615571bf620, errp=0x7ffcac60a4e0) at ../../hw/virtio/virtio-pci.c:2011 #29 0x0000561553ad5ee0 in device_set_realized (obj=0x5615571bf620, value=true, errp=0x7ffcac60a750) at ../../hw/core/qdev.c:531 #30 0x0000561553adf935 in property_set_bool (obj=0x5615571bf620, v=0x5615571c9db0, name=0x561553ef77d1 "realized", opaque=0x5615564a4b80, errp=0x7ffcac60a750) at ../../qom/object.c:2273 #31 0x0000561553add97c in object_property_set (obj=0x5615571bf620, name=0x561553ef77d1 "realized", v=0x5615571c9db0, errp=0x7ffcac60a750) at ../../qom/object.c:1408 #32 0x0000561553ae1d27 in object_property_set_qobject (obj=0x5615571bf620, name=0x561553ef77d1 "realized", value=0x5615571ca040, errp=0x7ffcac60a750) at ../../qom/qom-qobject.c:28 #33 0x0000561553addce1 in object_property_set_bool (obj=0x5615571bf620, name=0x561553ef77d1 "realized", value=true, errp=0x7ffcac60a750) at ../../qom/object.c:1477 #34 0x0000561553ad57a8 in qdev_realize (dev=0x5615571bf620, bus=0x5615569e82e0, errp=0x7ffcac60a750) at ../../hw/core/qdev.c:333 #35 0x00005615532f2523 in qdev_device_add_from_qdict (opts=0x5615571be500, from_json=false, errp=0x7ffcac60a750) at ../../softmmu/qdev-monitor.c:713 #36 0x00005615532f25c1 in qdev_device_add (opts=0x5615564a29d0, errp=0x56155451b300 <error_fatal>) at ../../softmmu/qdev-monitor.c:732 #37 0x00005615532f7b3d in device_init_func (opaque=0x0, opts=0x5615564a29d0, errp=0x56155451b300 <error_fatal>) at ../../softmmu/vl.c:1230 #38 0x0000561553cb965b in qemu_opts_foreach (list=0x561554373400 <qemu_device_opts>, func=0x5615532f7b16 <device_init_func>, opaque=0x0, errp=0x56155451b300 <error_fatal>) at ../../util/qemu-option.c:1135 #39 0x00005615532fb339 in qemu_create_cli_devices () at ../../softmmu/vl.c:2685 #40 0x00005615532fb4c1 in qmp_x_exit_preconfig (errp=0x56155451b300 <error_fatal>) at ../../softmmu/vl.c:2747 #41 0x00005615532fdb84 in qemu_init (argc=27, argv=0x7ffcac60ab78, envp=0x0) at ../../softmmu/vl.c:3776 #42 0x000056155326f0bd in qemu_main (argc=27, argv=0x7ffcac60ab78, envp=0x0) at ../../softmmu/main.c:35 #43 0x000056155326f0f3 in main (argc=27, argv=0x7ffcac60ab78) at ../../softmmu/main.c:45 I thought it was the vhost-user server that was closing things but as far as I can tell things just hang on the first attempt to respond to the protocol features: # Start of read-guest-mem tests subprocess_run_one_test: /aarch64/virt/generic-pcihost/pci-bus-generic/pci-bus/vhost-user-gpio-pci/vhost-user-gpio/vhost-user-gpio-tests/read-guest-mem/memfile/subprocess virtio_net_register_nodes: dbg virtio_gpio_register_nodes: dbg qos_set_machines_devices_available: query machines qos_set_machines_devices_available: qom-list-types qemu_chr_be_event: closing char0/(null) qemu_chr_be_event: closing qtest/(null) run_one_test: -M virt, -cpu max -device vhost-user-gpio-pci,id=gpio0,addr=04.0,chardev=chr-vhost-user-test run_one_test: -M virt, -cpu max -device vhost-user-gpio-pci,id=gpio0,addr=04.0,chardev=chr-vhost-user-test test_server_create_chr: created chr-vhost-user-test unix:/tmp/vhost-test-01rNam/vhost-user-test.sock,server=on,wait=off vhost_user_test_setup: -M virt, -cpu max -device vhost-user-gpio-pci,id=gpio0,addr=04.0,chardev=chr-vhost-user-test restart_qemu_or_continue: starting -M virt, -cpu max -device vhost-user-gpio-pci,id=gpio0,addr=04.0,chardev=chr-vhost-user-test -m 256 -object memory-backend-memfd,id=mem,si ze=256M, -numa node,memdev=mem -chardev socket,id=chr-vhost-user-test,path=/tmp/vhost-test-01rNam/vhost-user-test.sock chr_event: 1 vu_gpio_get_protocol_features: 0x202 vu_gpio_get_protocol_features: sent 20 byte reply qemu_chr_be_event: closing chr-vhost-user-test/disconnected:unix:/tmp/vhost-test-01rNam/vhost-user-test.sock virtio_pci_set_guest_notifiers: assign:false nvqs:2/0 qemu-system-aarch64: ../../hw/virtio/virtio-pci.c:1004: virtio_pci_set_guest_notifiers: Assertion `assign || nvqs == proxy->nvqs_with_notifiers' failed. qemu_chr_be_event: closing char0/disconnected:unix:/tmp/qtest-740831.qmp The vu_gpio_get_protocol_features seems to succeed in sending the reply but hangs at that point and eventually times out and the whole thing is torn down. Compared to the vhost-net test which runs a lot more cleanly. # Start of read-guest-mem tests subprocess_run_one_test: /aarch64/virt/generic-pcihost/pci-bus-generic/pci-bus/virtio-net-pci/virtio-net/virtio-net-tests/vhost-user/read-guest-mem/memfile/subprocess virtio_net_register_nodes: dbg virtio_gpio_register_nodes: dbg qos_set_machines_devices_available: query machines qos_set_machines_devices_available: qom-list-types qemu_chr_be_event: closing char0/(null) qemu_chr_be_event: closing qtest/(null) run_one_test: -M virt, -cpu max -device virtio-net-pci,netdev=hs0,addr=04.0 run_one_test: -M virt, -cpu max -device virtio-net-pci,netdev=hs0,addr=04.0 test_server_create_chr: created chr-vhost-user-test unix:/tmp/vhost-test-jtqo81/vhost-user-test.sock,server=on,wait=off vhost_user_test_setup: -M virt, -cpu max -device virtio-net-pci,netdev=hs0,addr=04.0 restart_qemu_or_continue: starting -M virt, -cpu max -device virtio-net-pci,netdev=hs0,addr=04.0 -m 256 -object memory-backend-memfd,id=mem,size=256M, -numa node,memdev=mem -chardev socket,id=chr-vhost-user-test,path=/tmp/vhost-test-jtqo81/vhost-user-test.sock -netdev vhost-user,id=hs0,chardev=chr-vhost-user-test,vhostforce=on chr_event: 1 vu_net_get_protocol_features: 0x42 vu_net_get_protocol_features: sent 20 byte reply virtio_set_status: 0x55896cab9510/0 virtio_set_status: 0x55896cab9510/0 restart_qemu_or_continue: returned from qtest_start qos_machine_new: aarch64/virt qos_driver_new: virtio-net-pci virtio_set_status: 0x55896cab9510/0 virtio_set_status: 0x55896cab9510/0 virtio_set_status: 0x55896cab9510/1 virtio_set_status: 0x55896cab9510/3 virtio_set_status: 0x55896cab9510/7 virtio_set_started: true virtio_pci_set_guest_notifiers: assign:true nvqs:2/0 vhost_dev_start: 0x55896c1902c0 0x55896cab9510 vu_net_set_features: 0x40000000 test_read_guest_mem: start vhost_user_test_cleanup: shutting down qemu_chr_be_event: closing chr-vhost-user-test/disconnected:unix:/tmp/vhost-test-jtqo81/vhost-user-test.sock qemu-system-aarch64:qemu_chr_be_event: closing chr-vhost-user-test/(null) Failed to set msg fds. qemu-system-aarch64: vhost VQ 0 ring restore failed: -22: Invalid argument (22) qemu-system-aarch64: Failed to set msg fds. qemu-system-aarch64: vhost VQ 1 ring restore failed: -22: Invalid argument (22) virtio_pci_set_guest_notifiers: assign:false nvqs:2/2 virtio_set_status: 0x55896cab9510/7 qemu_chr_be_event: closing chr-vhost-user-test/(null) qemu_chr_be_event: closing char0/(null) qemu_chr_be_event: closing qtest/(null) I'm open to any suggestions of the best way to track down where the chardev might be torn down and by who. Could it be the lifetime of the QMP and vhost-user sockets is being limited somehow? The backtrace shows the chardev in the state of G_IO_HUP which makes it seem like the vhost-user daemon has dropped the connection as soon as it has sent the protocol request reply? -- Alex Bennée