I'll let others chime in on higher level device model changes you're
making. For the vhost-user-blk and vhost-user-scsi bits:

On Sun, Mar 9, 2025 at 11:41 AM Haoqian He <haoqian...@smartx.com> wrote:
>
> Live migration should be terminated if backend crash.

nit: "...if the backend crashes before the migration completes."

>
> Since the vhost device will be stopped when VM is stopped before
> the end of the live migration, current implementation if vhost
> backend died, vhost device's set_status() will not return failure,
> live migration won't perceive the disconnection between qemu and
> vhost backend, inflight io would be submitted in migration target
> host, leading to IO error.
>
> To fix this issue:
> 1. Add set_status_ext() which has return value for
> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>
> 2. In set_status_ext(), return failure if the flag `connected`
> is false or vhost_dev_stop return failure, which means qemu lost
> connection with backend.

I have some concerns with [2]. See my later responses.

>
> Hence migration_completion() will process failure, terminate
> migration and restore VM.
>
> Signed-off-by: Haoqian He <haoqian...@smartx.com>
> ---
>  hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
>  hw/scsi/vhost-scsi-common.c           | 11 +++++-----
>  hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
>  hw/virtio/virtio.c                    | 20 +++++++++++++-----
>  include/hw/virtio/vhost-scsi-common.h |  2 +-
>  include/hw/virtio/virtio.h            |  1 +
>  6 files changed, 51 insertions(+), 32 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index ae42327cf8..4865786c54 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -204,7 +204,7 @@ err_host_notifiers:
>      return ret;
>  }
>
> -static void vhost_user_blk_stop(VirtIODevice *vdev)
> +static int vhost_user_blk_stop(VirtIODevice *vdev)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>      int ret;
>
>      if (!s->started_vu) {
> -        return;
> +        return 0;
>      }
>      s->started_vu = false;
>
>      if (!k->set_guest_notifiers) {
> -        return;
> +        return 0;
>      }
>
> -    vhost_dev_stop(&s->dev, vdev, true);
> +    ret = vhost_dev_stop(&s->dev, vdev, true);
>
> -    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
> -    if (ret < 0) {
> +    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>          error_report("vhost guest notifier cleanup failed: %d", ret);
> -        return;
> +        return -1;
>      }
>
>      vhost_dev_disable_notifiers(&s->dev, vdev);
> +    return ret;
>  }
>
> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>      bool should_start = virtio_device_should_start(vdev, status);
> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice 
> *vdev, uint8_t status)
>      int ret;
>

Do we want to fail out in all cases where vhost_user_blk_set_status()
and the device is not connected? Could this impact power-on if the
backend is temporarily down?

>      if (!s->connected) {
> -        return;
> +        return -1;
>      }
>
>      if (vhost_dev_is_started(&s->dev) == should_start) {
> -        return;
> +        return 0;
>      }
>
>      if (should_start) {
> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice 
> *vdev, uint8_t status)
>              qemu_chr_fe_disconnect(&s->chardev);
>          }
>      } else {
> -        vhost_user_blk_stop(vdev);
> +        ret = vhost_user_blk_stop(vdev);
> +        if (ret < 0) {
> +            return ret;
> +        }
>      }
> -
> +    return 0;
>  }
>
>  static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, 
> void *data)
>      vdc->get_config = vhost_user_blk_update_config;
>      vdc->set_config = vhost_user_blk_set_config;
>      vdc->get_features = vhost_user_blk_get_features;
> -    vdc->set_status = vhost_user_blk_set_status;
> +    vdc->set_status_ext = vhost_user_blk_set_status;
>      vdc->reset = vhost_user_blk_reset;
>      vdc->get_vhost = vhost_user_blk_get_vhost;
>  }
> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
> index 4c8637045d..bfeed0cb1b 100644
> --- a/hw/scsi/vhost-scsi-common.c
> +++ b/hw/scsi/vhost-scsi-common.c
> @@ -101,24 +101,25 @@ err_host_notifiers:
>      return ret;
>  }
>
> -void vhost_scsi_common_stop(VHostSCSICommon *vsc)
> +int vhost_scsi_common_stop(VHostSCSICommon *vsc)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>      BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>      int ret = 0;
>
> -    vhost_dev_stop(&vsc->dev, vdev, true);
> +    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>
>      if (k->set_guest_notifiers) {
> -        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
> -        if (ret < 0) {
> +        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
> +        if (r < 0) {
>                  error_report("vhost guest notifier cleanup failed: %d", ret);
>          }

Now that we return back a value the assert here should probably be dropped?

> +        assert(r >= 0);
>      }
> -    assert(ret >= 0);
>
>      vhost_dev_disable_notifiers(&vsc->dev, vdev);
> +    return ret;
>  }
>
>  uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t 
> features,
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index adb41b9816..8e7efc38f2 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error 
> **errp)
>      return ret;
>  }
>
> -static void vhost_user_scsi_stop(VHostUserSCSI *s)
> +static int vhost_user_scsi_stop(VHostUserSCSI *s)
>  {
>      VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>
>      if (!s->started_vu) {
> -        return;
> +        return 0;
>      }
>      s->started_vu = false;
>
> -    vhost_scsi_common_stop(vsc);
> +    return vhost_scsi_common_stop(vsc);
>  }
>
> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>      VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>      DeviceState *dev = DEVICE(vdev);
> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice 
> *vdev, uint8_t status)
>      int ret;
>
>      if (!s->connected) {
> -        return;
> +        return -1;
>      }
>
>      if (vhost_dev_is_started(&vsc->dev) == should_start) {
> -        return;
> +        return 0;
>      }
>
>      if (should_start) {
> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, 
> uint8_t status)
>              qemu_chr_fe_disconnect(&vs->conf.chardev);
>          }
>      } else {
> -        vhost_user_scsi_stop(s);
> +        ret = vhost_user_scsi_stop(s);
> +        if (ret) {
> +            return ret;
> +        }
>      }
> +    return 0;
>  }
>
>  static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass 
> *klass, void *data)
>      vdc->unrealize = vhost_user_scsi_unrealize;
>      vdc->get_features = vhost_scsi_common_get_features;
>      vdc->set_config = vhost_scsi_common_set_config;
> -    vdc->set_status = vhost_user_scsi_set_status;
> +    vdc->set_status_ext = vhost_user_scsi_set_status;
>      fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>      vdc->reset = vhost_user_scsi_reset;
>      vdc->get_vhost = vhost_user_scsi_get_vhost;
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 5e8d4cab53..fff7cdb35d 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>  {
>      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>      trace_virtio_set_status(vdev, val);
> +    int ret = 0;
>
>      if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>          if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>              val & VIRTIO_CONFIG_S_FEATURES_OK) {
> -            int ret = virtio_validate_features(vdev);
> -
> +            ret = virtio_validate_features(vdev);
>              if (ret) {
>                  return ret;
>              }
> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>          virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>      }
>
> -    if (k->set_status) {
> +    if (k->set_status_ext) {
> +        ret = k->set_status_ext(vdev, val);
> +        if (ret) {
> +            qemu_log("set %s status to %d failed, old status: %d\n",
> +                     vdev->name, val, vdev->status);
> +        }
> +    } else if (k->set_status) {
>          k->set_status(vdev, val);
>      }
>      vdev->status = val;
>
> -    return 0;
> +    return ret;
>  }
>
>  static enum virtio_device_endian virtio_default_endian(void)
> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool 
> running, RunState state)
>      }
>
>      if (!backend_run) {
> -        virtio_set_status(vdev, vdev->status);
> +        // the return value was used for stopping VM during migration
> +        int ret = virtio_set_status(vdev, vdev->status);
> +        if (ret) {
> +            return ret;
> +        }
>      }
>      return 0;
>  }
> diff --git a/include/hw/virtio/vhost-scsi-common.h 
> b/include/hw/virtio/vhost-scsi-common.h
> index c5d2c09455..d54d9c916f 100644
> --- a/include/hw/virtio/vhost-scsi-common.h
> +++ b/include/hw/virtio/vhost-scsi-common.h
> @@ -40,7 +40,7 @@ struct VHostSCSICommon {
>  };
>
>  int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
> -void vhost_scsi_common_stop(VHostSCSICommon *vsc);
> +int vhost_scsi_common_stop(VHostSCSICommon *vsc);
>  char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>                                          DeviceState *dev);
>  void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 6386910280..c99d56f519 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>      void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>      void (*reset)(VirtIODevice *vdev);
>      void (*set_status)(VirtIODevice *vdev, uint8_t val);
> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>      /* Device must validate queue_index.  */
>      void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
>      /* Device must validate queue_index.  */
> --
> 2.48.1
>

Reply via email to