> 2025年3月19日 23:11,Stefano Garzarella <sgarz...@redhat.com> 写道: > > On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: >> The backend maybe crash when vhost_dev_stop and GET_VRING_BASE >> would fail, we can return failure to indicate the connection >> with the backend is broken. >> >> Signed-off-by: Haoqian He <haoqian...@smartx.com> >> --- >> hw/virtio/vhost.c | 27 +++++++++++++++------------ >> include/hw/virtio/vhost.h | 8 +++++--- >> 2 files changed, 20 insertions(+), 15 deletions(-) >> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >> index 6aa72fd434..c82bbbe4cc 100644 >> --- a/hw/virtio/vhost.c >> +++ b/hw/virtio/vhost.c >> @@ -1368,23 +1368,23 @@ fail_alloc_desc: >> return r; >> } >> >> -void vhost_virtqueue_stop(struct vhost_dev *dev, >> - struct VirtIODevice *vdev, >> - struct vhost_virtqueue *vq, >> - unsigned idx) >> +int vhost_virtqueue_stop(struct vhost_dev *dev, >> + struct VirtIODevice *vdev, >> + struct vhost_virtqueue *vq, >> + unsigned idx) >> { >> int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); >> struct vhost_vring_state state = { >> .index = vhost_vq_index, >> }; >> - int r; >> + int r = 0; >> >> if (virtio_queue_get_desc_addr(vdev, idx) == 0) { >> /* Don't stop the virtqueue which might have not been started */ >> - return; >> + return 0; >> } >> >> - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); >> + r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); > > We can avoid this and also initialize r to 0.
Here we need to do `vhost_virtqueue_stop` for each vq. > >> if (r < 0) { >> VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); >> /* Connection to the backend is broken, so let's sync internal >> @@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, >> 0, virtio_queue_get_avail_size(vdev, idx)); >> vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), >> 0, virtio_queue_get_desc_size(vdev, idx)); >> + return r; >> } >> >> static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, >> @@ -2136,9 +2137,10 @@ fail_features: >> } >> >> /* Host notifiers must be enabled at this point. */ >> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> { >> int i; >> + int rc = 0; >> >> /* should only be called after backend is connected */ >> assert(hdev->vhost_ops); >> @@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, >> VirtIODevice *vdev, bool vrings) >> vhost_dev_set_vring_enable(hdev, false); >> } >> for (i = 0; i < hdev->nvqs; ++i) { >> - vhost_virtqueue_stop(hdev, >> - vdev, >> - hdev->vqs + i, >> - hdev->vq_index + i); >> + rc |= vhost_virtqueue_stop(hdev, >> + vdev, >> + hdev->vqs + i, >> + hdev->vq_index + i); > > Also other function can fails, should we consider also them? > (e.g. , vhost_dev_set_vring_enable, etc.) > > If not, why? Since we only want to know the return value of callback when the stopping device live migration, there is no need to catch the failure of `vhost_dev_start`. We can also catch the failure of `vhost_dev_set_vring_enable`, because `vhost_dev_set_vring_enable` will also fail if qemu lost connection with the the backend, but I need to test it. > >> } >> if (hdev->vhost_ops->vhost_reset_status) { >> hdev->vhost_ops->vhost_reset_status(hdev); >> @@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, >> VirtIODevice *vdev, bool vrings) >> hdev->started = false; >> vdev->vhost_started = false; >> hdev->vdev = NULL; >> + return rc; >> } >> >> int vhost_net_set_backend(struct vhost_dev *hdev, >> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >> index a9469d50bc..fd96ec9c39 100644 >> --- a/include/hw/virtio/vhost.h >> +++ b/include/hw/virtio/vhost.h >> @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, >> VirtIODevice *vdev, bool vrings); >> * Stop the vhost device. After the device is stopped the notifiers >> * can be disabled (@vhost_dev_disable_notifiers) and the device can >> * be torn down (@vhost_dev_cleanup). >> + * >> + * Return: 0 on success, != 0 on error when stopping dev. >> */ >> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool >> vrings); >> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >> >> /** >> * DOC: vhost device configuration handling >> @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, >> uint64_t iova, int write); >> >> int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, >> struct vhost_virtqueue *vq, unsigned idx); >> -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >> - struct vhost_virtqueue *vq, unsigned idx); >> +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >> + struct vhost_virtqueue *vq, unsigned idx); >> >> void vhost_dev_reset_inflight(struct vhost_inflight *inflight); >> void vhost_dev_free_inflight(struct vhost_inflight *inflight); >> -- >> 2.48.1