Thanks for reviewing, I will fix this grammar issue in the next version.
> 
> 
> 2025年3月10日 00:30,Raphael Norwitz <raph...@enfabrica.net> 写道:
> 
> I'll let others chime in on higher level device model changes you're
> making. For the vhost-user-blk and vhost-user-scsi bits:
> 
> On Sun, Mar 9, 2025 at 11:41 AM Haoqian He <haoqian...@smartx.com> wrote:
>> 
>> Live migration should be terminated if backend crash.
> 
> nit: "...if the backend crashes before the migration completes."
> 
>> 
>> Since the vhost device will be stopped when VM is stopped before
>> the end of the live migration, current implementation if vhost
>> backend died, vhost device's set_status() will not return failure,
>> live migration won't perceive the disconnection between qemu and
>> vhost backend, inflight io would be submitted in migration target
>> host, leading to IO error.
>> 
>> To fix this issue:
>> 1. Add set_status_ext() which has return value for
>> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>> 
>> 2. In set_status_ext(), return failure if the flag `connected`
>> is false or vhost_dev_stop return failure, which means qemu lost
>> connection with backend.
> 
> I have some concerns with [2]. See my later responses.
> 
>> 
>> Hence migration_completion() will process failure, terminate
>> migration and restore VM.
>> 
>> Signed-off-by: Haoqian He <haoqian...@smartx.com>
>> ---
>> hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
>> hw/scsi/vhost-scsi-common.c           | 11 +++++-----
>> hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
>> hw/virtio/virtio.c                    | 20 +++++++++++++-----
>> include/hw/virtio/vhost-scsi-common.h |  2 +-
>> include/hw/virtio/virtio.h            |  1 +
>> 6 files changed, 51 insertions(+), 32 deletions(-)
>> 
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index ae42327cf8..4865786c54 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -204,7 +204,7 @@ err_host_notifiers:
>>     return ret;
>> }
>> 
>> -static void vhost_user_blk_stop(VirtIODevice *vdev)
>> +static int vhost_user_blk_stop(VirtIODevice *vdev)
>> {
>>     VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>>     int ret;
>> 
>>     if (!s->started_vu) {
>> -        return;
>> +        return 0;
>>     }
>>     s->started_vu = false;
>> 
>>     if (!k->set_guest_notifiers) {
>> -        return;
>> +        return 0;
>>     }
>> 
>> -    vhost_dev_stop(&s->dev, vdev, true);
>> +    ret = vhost_dev_stop(&s->dev, vdev, true);
>> 
>> -    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>> -    if (ret < 0) {
>> +    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>>         error_report("vhost guest notifier cleanup failed: %d", ret);
>> -        return;
>> +        return -1;
>>     }
>> 
>>     vhost_dev_disable_notifiers(&s->dev, vdev);
>> +    return ret;
>> }
>> 
>> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>     VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>     bool should_start = virtio_device_should_start(vdev, status);
>> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice 
>> *vdev, uint8_t status)
>>     int ret;
>> 
> 

The series of patches will not impact power-on if the backend is temporarily
down. The first patch ([PATCH 1/3] virtio: add VM state change cb with return
value) ignores the return value of VMChangeStateHandlerExt, which is
vhost_user_blk_set_status:

@@ -393,7 +398,12 @@ void vm_state_notify(bool running, RunState state)
        }

        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
-            e->cb(e->opaque, running, state);
+            if (e->cb) {
+                e->cb(e->opaque, running, state);
+            } else if (e->cb_ext) {
+                // no need to process the result when starting VM
+                e->cb_ext(e->opaque, running, state);
+            }
        }
    } else {
        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
@@ -403,9 +413,14 @@ void vm_state_notify(bool running, RunState state)
        }

        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
-            e->cb(e->opaque, running, state);
+            if (e->cb) {
+                e->cb(e->opaque, running, state);
+            } else if (e->cb_ext) {
+                ret |= e->cb_ext(e->opaque, running, state);
+            }
        }
    }
+    return ret;
}
> 
> 
> Do we want to fail out in all cases where vhost_user_blk_set_status()
> and the device is not connected? Could this impact power-on if the
> backend is temporarily down?
> 
>>     if (!s->connected) {
>> -        return;
>> +        return -1;
>>     }
>> 
>>     if (vhost_dev_is_started(&s->dev) == should_start) {
>> -        return;
>> +        return 0;
>>     }
>> 
>>     if (should_start) {
>> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice 
>> *vdev, uint8_t status)
>>             qemu_chr_fe_disconnect(&s->chardev);
>>         }
>>     } else {
>> -        vhost_user_blk_stop(vdev);
>> +        ret = vhost_user_blk_stop(vdev);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>>     }
>> -
>> +    return 0;
>> }
>> 
>> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass 
>> *klass, void *data)
>>     vdc->get_config = vhost_user_blk_update_config;
>>     vdc->set_config = vhost_user_blk_set_config;
>>     vdc->get_features = vhost_user_blk_get_features;
>> -    vdc->set_status = vhost_user_blk_set_status;
>> +    vdc->set_status_ext = vhost_user_blk_set_status;
>>     vdc->reset = vhost_user_blk_reset;
>>     vdc->get_vhost = vhost_user_blk_get_vhost;
>> }
>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>> index 4c8637045d..bfeed0cb1b 100644
>> --- a/hw/scsi/vhost-scsi-common.c
>> +++ b/hw/scsi/vhost-scsi-common.c
>> @@ -101,24 +101,25 @@ err_host_notifiers:
>>     return ret;
>> }
>> 
>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc)
>> {
>>     VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>>     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>>     int ret = 0;
>> 
>> -    vhost_dev_stop(&vsc->dev, vdev, true);
>> +    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>> 
>>     if (k->set_guest_notifiers) {
>> -        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>> -        if (ret < 0) {
>> +        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>> +        if (r < 0) {
>>                 error_report("vhost guest notifier cleanup failed: %d", ret);
>>         }
> 

Yes, we can remove the assert here and return the return value, which will
also be modified in the next version.
> Now that we return back a value the assert here should probably be dropped?
> 
>> +        assert(r >= 0);
>>     }
>> -    assert(ret >= 0);
>> 
>>     vhost_dev_disable_notifiers(&vsc->dev, vdev);
>> +    return ret;
>> }
>> 
>> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t 
>> features,
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index adb41b9816..8e7efc38f2 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error 
>> **errp)
>>     return ret;
>> }
>> 
>> -static void vhost_user_scsi_stop(VHostUserSCSI *s)
>> +static int vhost_user_scsi_stop(VHostUserSCSI *s)
>> {
>>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> 
>>     if (!s->started_vu) {
>> -        return;
>> +        return 0;
>>     }
>>     s->started_vu = false;
>> 
>> -    vhost_scsi_common_stop(vsc);
>> +    return vhost_scsi_common_stop(vsc);
>> }
>> 
>> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>     VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>>     DeviceState *dev = DEVICE(vdev);
>> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice 
>> *vdev, uint8_t status)
>>     int ret;
>> 
>>     if (!s->connected) {
>> -        return;
>> +        return -1;
>>     }
>> 
>>     if (vhost_dev_is_started(&vsc->dev) == should_start) {
>> -        return;
>> +        return 0;
>>     }
>> 
>>     if (should_start) {
>> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice 
>> *vdev, uint8_t status)
>>             qemu_chr_fe_disconnect(&vs->conf.chardev);
>>         }
>>     } else {
>> -        vhost_user_scsi_stop(s);
>> +        ret = vhost_user_scsi_stop(s);
>> +        if (ret) {
>> +            return ret;
>> +        }
>>     }
>> +    return 0;
>> }
>> 
>> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass 
>> *klass, void *data)
>>     vdc->unrealize = vhost_user_scsi_unrealize;
>>     vdc->get_features = vhost_scsi_common_get_features;
>>     vdc->set_config = vhost_scsi_common_set_config;
>> -    vdc->set_status = vhost_user_scsi_set_status;
>> +    vdc->set_status_ext = vhost_user_scsi_set_status;
>>     fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>>     vdc->reset = vhost_user_scsi_reset;
>>     vdc->get_vhost = vhost_user_scsi_get_vhost;
>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> index 5e8d4cab53..fff7cdb35d 100644
>> --- a/hw/virtio/virtio.c
>> +++ b/hw/virtio/virtio.c
>> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t 
>> val)
>> {
>>     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>>     trace_virtio_set_status(vdev, val);
>> +    int ret = 0;
>> 
>>     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>>         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>>             val & VIRTIO_CONFIG_S_FEATURES_OK) {
>> -            int ret = virtio_validate_features(vdev);
>> -
>> +            ret = virtio_validate_features(vdev);
>>             if (ret) {
>>                 return ret;
>>             }
>> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t 
>> val)
>>         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>>     }
>> 
>> -    if (k->set_status) {
>> +    if (k->set_status_ext) {
>> +        ret = k->set_status_ext(vdev, val);
>> +        if (ret) {
>> +            qemu_log("set %s status to %d failed, old status: %d\n",
>> +                     vdev->name, val, vdev->status);
>> +        }
>> +    } else if (k->set_status) {
>>         k->set_status(vdev, val);
>>     }
>>     vdev->status = val;
>> 
>> -    return 0;
>> +    return ret;
>> }
>> 
>> static enum virtio_device_endian virtio_default_endian(void)
>> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool 
>> running, RunState state)
>>     }
>> 
>>     if (!backend_run) {
>> -        virtio_set_status(vdev, vdev->status);
>> +        // the return value was used for stopping VM during migration
>> +        int ret = virtio_set_status(vdev, vdev->status);
>> +        if (ret) {
>> +            return ret;
>> +        }
>>     }
>>     return 0;
>> }
>> diff --git a/include/hw/virtio/vhost-scsi-common.h 
>> b/include/hw/virtio/vhost-scsi-common.h
>> index c5d2c09455..d54d9c916f 100644
>> --- a/include/hw/virtio/vhost-scsi-common.h
>> +++ b/include/hw/virtio/vhost-scsi-common.h
>> @@ -40,7 +40,7 @@ struct VHostSCSICommon {
>> };
>> 
>> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc);
>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc);
>> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>>                                         DeviceState *dev);
>> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>> index 6386910280..c99d56f519 100644
>> --- a/include/hw/virtio/virtio.h
>> +++ b/include/hw/virtio/virtio.h
>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>     void (*reset)(VirtIODevice *vdev);
>>     void (*set_status)(VirtIODevice *vdev, uint8_t val);
>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>>     /* Device must validate queue_index.  */
>>     void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
>>     /* Device must validate queue_index.  */
>> --
>> 2.48.1


Thanks

--
Haoqian


Reply via email to