> 2025年3月24日 22:31,Stefano Garzarella <sgarz...@redhat.com> 写道: > > On Thu, Mar 20, 2025 at 08:21:30PM +0800, Haoqian He wrote: >> >> >>> 2025年3月19日 23:20,Stefano Garzarella <sgarz...@redhat.com> 写道: >>> >>> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote: >>>> Live migration should be terminated if the backend crashes before >>>> the migration completes. >>>> >>>> Since the vhost device will be stopped when VM is stopped before >>>> the end of the live migration, current implementation if vhost >>>> backend died, vhost device's set_status() will not return failure, >>>> live migration won't perceive the disconnection between qemu and >>>> vhost backend, inflight io would be submitted in migration target >>>> host, leading to IO error. >>>> >>>> To fix this issue: >>>> 1. Add set_status_ext() which has return value for >>>> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version. >>>> >>>> 2. In set_status_ext(), return failure if the flag `connected` >>>> is false or vhost_dev_stop return failure, which means qemu lost >>>> connection with backend. >>>> >>>> Hence migration_completion() will process failure, terminate >>>> migration and restore VM. >>>> >>>> Signed-off-by: Haoqian He <haoqian...@smartx.com> >>>> --- >>>> hw/block/vhost-user-blk.c | 29 +++++++++++++++------------ >>>> hw/scsi/vhost-scsi-common.c | 13 ++++++------ >>>> hw/scsi/vhost-user-scsi.c | 20 ++++++++++-------- >>>> hw/virtio/virtio.c | 20 +++++++++++++----- >>>> include/hw/virtio/vhost-scsi-common.h | 2 +- >>>> include/hw/virtio/virtio.h | 1 + >>>> 6 files changed, 52 insertions(+), 33 deletions(-) >>>> >>>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c >>>> index ae42327cf8..4865786c54 100644 >>>> --- a/hw/block/vhost-user-blk.c >>>> +++ b/hw/block/vhost-user-blk.c >>>> @@ -204,7 +204,7 @@ err_host_notifiers: >>>> return ret; >>>> } >>>> >>>> -static void vhost_user_blk_stop(VirtIODevice *vdev) >>>> +static int vhost_user_blk_stop(VirtIODevice *vdev) >>>> { >>>> VHostUserBlk *s = VHOST_USER_BLK(vdev); >>>> BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); >>>> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) >>>> int ret; >>>> >>>> if (!s->started_vu) { >>>> - return; >>>> + return 0; >>>> } >>>> s->started_vu = false; >>>> >>>> if (!k->set_guest_notifiers) { >>>> - return; >>>> + return 0; >>>> } >>>> >>>> - vhost_dev_stop(&s->dev, vdev, true); >>>> + ret = vhost_dev_stop(&s->dev, vdev, true); >>>> >>>> - ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); >>>> - if (ret < 0) { >>>> + if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) { >>>> error_report("vhost guest notifier cleanup failed: %d", ret); >>>> - return; >>>> + return -1; >>>> } >>>> >>>> vhost_dev_disable_notifiers(&s->dev, vdev); >>>> + return ret; >>>> } >>>> >>>> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) >>>> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) >>>> { >>>> VHostUserBlk *s = VHOST_USER_BLK(vdev); >>>> bool should_start = virtio_device_should_start(vdev, status); >>>> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice >>>> *vdev, uint8_t status) >>>> int ret; >>>> >>>> if (!s->connected) { >>>> - return; >>>> + return -1; >>>> } >>>> >>>> if (vhost_dev_is_started(&s->dev) == should_start) { >>>> - return; >>>> + return 0; >>>> } >>>> >>>> if (should_start) { >>>> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice >>>> *vdev, uint8_t status) >>>> qemu_chr_fe_disconnect(&s->chardev); >>>> } >>>> } else { >>>> - vhost_user_blk_stop(vdev); >>>> + ret = vhost_user_blk_stop(vdev); >>>> + if (ret < 0) { >>>> + return ret; >>>> + } >>>> } >>>> - >>>> + return 0; >>>> } >>>> >>>> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, >>>> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass >>>> *klass, void *data) >>>> vdc->get_config = vhost_user_blk_update_config; >>>> vdc->set_config = vhost_user_blk_set_config; >>>> vdc->get_features = vhost_user_blk_get_features; >>>> - vdc->set_status = vhost_user_blk_set_status; >>>> + vdc->set_status_ext = vhost_user_blk_set_status; >>>> vdc->reset = vhost_user_blk_reset; >>>> vdc->get_vhost = vhost_user_blk_get_vhost; >>>> } >>>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c >>>> index 4c8637045d..43525ba46d 100644 >>>> --- a/hw/scsi/vhost-scsi-common.c >>>> +++ b/hw/scsi/vhost-scsi-common.c >>>> @@ -101,24 +101,25 @@ err_host_notifiers: >>>> return ret; >>>> } >>>> >>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc) >>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc) >>>> { >>>> VirtIODevice *vdev = VIRTIO_DEVICE(vsc); >>>> BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); >>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); >>>> int ret = 0; >>>> >>>> - vhost_dev_stop(&vsc->dev, vdev, true); >>>> + ret = vhost_dev_stop(&vsc->dev, vdev, true); >>>> >>>> if (k->set_guest_notifiers) { >>>> - ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); >>>> - if (ret < 0) { >>>> - error_report("vhost guest notifier cleanup failed: %d", >>>> ret); >>>> + int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, >>>> false); >>>> + if (r < 0) { >>>> + error_report("vhost guest notifier cleanup failed: %d", ret); >>> >>> The variable `ret` in the error_report() seems wrong. >> >> Ohh, thanks, I will fix it later. >> >>> >>>> + return r; >>>> } >>>> } >>>> - assert(ret >= 0); >>>> >>>> vhost_dev_disable_notifiers(&vsc->dev, vdev); >>>> + return ret; >>>> } >>>> >>>> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t >>>> features, >>>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c >>>> index adb41b9816..8e7efc38f2 100644 >>>> --- a/hw/scsi/vhost-user-scsi.c >>>> +++ b/hw/scsi/vhost-user-scsi.c >>>> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, >>>> Error **errp) >>>> return ret; >>>> } >>>> >>>> -static void vhost_user_scsi_stop(VHostUserSCSI *s) >>>> +static int vhost_user_scsi_stop(VHostUserSCSI *s) >>>> { >>>> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> >>>> if (!s->started_vu) { >>>> - return; >>>> + return 0; >>>> } >>>> s->started_vu = false; >>>> >>>> - vhost_scsi_common_stop(vsc); >>>> + return vhost_scsi_common_stop(vsc); >>>> } >>>> >>>> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) >>>> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) >>>> { >>>> VHostUserSCSI *s = (VHostUserSCSI *)vdev; >>>> DeviceState *dev = DEVICE(vdev); >>>> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice >>>> *vdev, uint8_t status) >>>> int ret; >>>> >>>> if (!s->connected) { >>>> - return; >>>> + return -1; >>>> } >>>> >>>> if (vhost_dev_is_started(&vsc->dev) == should_start) { >>>> - return; >>>> + return 0; >>>> } >>>> >>>> if (should_start) { >>>> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice >>>> *vdev, uint8_t status) >>>> qemu_chr_fe_disconnect(&vs->conf.chardev); >>>> } >>>> } else { >>>> - vhost_user_scsi_stop(s); >>>> + ret = vhost_user_scsi_stop(s); >>>> + if (ret) { >>>> + return ret; >>>> + } >>>> } >>>> + return 0; >>>> } >>>> >>>> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue >>>> *vq) >>>> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass >>>> *klass, void *data) >>>> vdc->unrealize = vhost_user_scsi_unrealize; >>>> vdc->get_features = vhost_scsi_common_get_features; >>>> vdc->set_config = vhost_scsi_common_set_config; >>>> - vdc->set_status = vhost_user_scsi_set_status; >>>> + vdc->set_status_ext = vhost_user_scsi_set_status; >>>> fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; >>>> vdc->reset = vhost_user_scsi_reset; >>>> vdc->get_vhost = vhost_user_scsi_get_vhost; >>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c >>>> index 5e8d4cab53..fff7cdb35d 100644 >>>> --- a/hw/virtio/virtio.c >>>> +++ b/hw/virtio/virtio.c >>>> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t >>>> val) >>>> { >>>> VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); >>>> trace_virtio_set_status(vdev, val); >>>> + int ret = 0; >>>> >>>> if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { >>>> if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && >>>> val & VIRTIO_CONFIG_S_FEATURES_OK) { >>>> - int ret = virtio_validate_features(vdev); >>>> - >>>> + ret = virtio_validate_features(vdev); >>>> if (ret) { >>>> return ret; >>>> } >>>> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t >>>> val) >>>> virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); >>>> } >>>> >>>> - if (k->set_status) { >>>> + if (k->set_status_ext) { >>>> + ret = k->set_status_ext(vdev, val); >>>> + if (ret) { >>>> + qemu_log("set %s status to %d failed, old status: %d\n", >>>> + vdev->name, val, vdev->status); >>>> + } >>>> + } else if (k->set_status) { >>>> k->set_status(vdev, val); >>>> } >>>> vdev->status = val; >>>> >>>> - return 0; >>>> + return ret; >>>> } >>>> >>>> static enum virtio_device_endian virtio_default_endian(void) >>>> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool >>>> running, RunState state) >>>> } >>>> >>>> if (!backend_run) { >>>> - virtio_set_status(vdev, vdev->status); >>>> + // the return value was used for stopping VM during migration >>> >>> Can you elaborate a bit this comment? >> >> This comment is to explain that the return value is used to indicate that >> the live migration of the stop vhost-user device failed cuz the lost >> connection with backend. >> >>> >>>> + int ret = virtio_set_status(vdev, vdev->status); >>>> + if (ret) { >>>> + return ret; >>>> + } >>>> } >>>> return 0; >>>> } >>>> diff --git a/include/hw/virtio/vhost-scsi-common.h >>>> b/include/hw/virtio/vhost-scsi-common.h >>>> index c5d2c09455..d54d9c916f 100644 >>>> --- a/include/hw/virtio/vhost-scsi-common.h >>>> +++ b/include/hw/virtio/vhost-scsi-common.h >>>> @@ -40,7 +40,7 @@ struct VHostSCSICommon { >>>> }; >>>> >>>> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp); >>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc); >>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc); >>>> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus, >>>> DeviceState *dev); >>>> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t >>>> *config); >>>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h >>>> index 6386910280..c99d56f519 100644 >>>> --- a/include/hw/virtio/virtio.h >>>> +++ b/include/hw/virtio/virtio.h >>>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass { >>>> void (*set_config)(VirtIODevice *vdev, const uint8_t *config); >>>> void (*reset)(VirtIODevice *vdev); >>>> void (*set_status)(VirtIODevice *vdev, uint8_t val); >>>> + int (*set_status_ext)(VirtIODevice *vdev, uint8_t val); >>> >>> Why we need a new callback instead having `set_status` returning int ? >> >> Because there are other devices such as virtio-net, virtio-ballon, etc., >> we only focus on vhost-user-blk/scsi when live migration. > > Why only them? > > What I mean, is why in devices where it's not important, don't we just return > 0? > It seems more complicated to maintain and confusing for new devices to have 2 > callbacks for the same thing. > > Stefano
The series of these patches only want to fix that the inflight IO can't be completed due to the disconnection between and the vhost-user backend for vhost-user-blk / scsi devices during live migration. For other virito devices the issue does not exist, and `vm_state_notify` cannot distinguish specific devices, it's better not to return error. I try to list the virtio sub-devices as follows: hw/virtio/virtio-iommu.c: vdc->set_status = virtio_iommu_set_status; hw/virtio/virtio-balloon.c: vdc->set_status = virtio_balloon_set_status; hw/virtio/virtio-rng.c: vdc->set_status = virtio_rng_set_status; hw/virtio/virtio-crypto.c: vdc->set_status = virtio_crypto_set_status; hw/virtio/vhost-vsock.c: vdc->set_status = vhost_vsock_set_status; hw/virtio/vhost-user-vsock.c: vdc->set_status = vuv_set_status; hw/virtio/vhost-user-scmi.c: vdc->set_status = vu_scmi_set_status; hw/virtio/vhost-user-fs.c: vdc->set_status = vuf_set_status; hw/virtio/vhost-user-base.c: vdc->set_status = vub_set_status; hw/virtio/vdpa-dev.c: vdc->set_status = vhost_vdpa_device_set_status; tests/qtest/libqos/virtio-pci.c: .set_status = qvirtio_pci_set_status, tests/qtest/libqos/virtio-pci-modern.c: .set_status = set_status, tests/qtest/libqos/virtio-mmio.c: .set_status = qvirtio_mmio_set_status, hw/scsi/vhost-user-scsi.c: vdc->set_status = vhost_user_scsi_set_status; hw/scsi/vhost-scsi.c: vdc->set_status = vhost_scsi_set_status; hw/net/virtio-net.c: vdc->set_status = virtio_net_set_status; hw/char/virtio-serial-bus.c: vdc->set_status = set_status; hw/block/vhost-user-blk.c: vdc->set_status = vhost_user_blk_set_status; hw/block/virtio-blk.c: vdc->set_status = virtio_blk_set_status; If the new function pointer type is not added, the number of functions affected will be very huge. Although it may seem a bit complicated to use two callbacks, it's much safer. Thanks, Haoqian