On Fri, Apr 1, 2022 at 9:31 AM Michael Qiu <qiud...@archeros.com> wrote: > > Currently, when VM poweroff, it will trigger vdpa > device(such as mlx bluefield2 VF) reset many times(with 1 datapath > queue pair and one control queue, triggered 3 times), this > leads to below issue: > > vhost VQ 2 ring restore failed: -22: Invalid argument (22) > > This because in vhost_net_stop(), it will stop all vhost device bind to > this virtio device, and in vhost_dev_stop(), qemu tries to stop the device > , then stop the queue: vhost_virtqueue_stop(). > > In vhost_dev_stop(), it resets the device, which clear some flags > in low level driver, and in next loop(stop other vhost backends), > qemu try to stop the queue corresponding to the vhost backend, > the driver finds that the VQ is invalied, this is the root cause. > > To solve the issue, vdpa should set vring unready, and > remove reset ops in device stop: vhost_dev_start(hdev, false). > > and implement a new function vhost_dev_reset, only reset backend > device after all vhost(per-queue) stoped.
Typo. > > Signed-off-by: Michael Qiu<qiud...@archeros.com> > Acked-by: Jason Wang <jasow...@redhat.com> Rethink this patch, consider there're devices that don't support set_vq_ready(). I wonder if we need 1) uAPI to tell the user space whether or not it supports set_vq_ready() 2) userspace will call SET_VRING_ENABLE() when the device supports otherwise it will use RESET. And for safety, I suggest tagging this as 7.1. > --- > v4 --> v3 > Nothing changed, becasue of issue with mimecast, > when the From: tag is different from the sender, > the some mail client will take the patch as an > attachment, RESEND v3 does not work, So resend > the patch as v4 > > v3 --> v2: > Call vhost_dev_reset() at the end of vhost_net_stop(). > > Since the vDPA device need re-add the status bit > VIRTIO_CONFIG_S_ACKNOWLEDGE and VIRTIO_CONFIG_S_DRIVER, > simply, add them inside vhost_vdpa_reset_device, and > the only way calling vhost_vdpa_reset_device is in > vhost_net_stop(), so it keeps the same behavior as before. > > v2 --> v1: > Implement a new function vhost_dev_reset, > reset the backend kernel device at last. > --- > hw/net/vhost_net.c | 24 +++++++++++++++++++++--- > hw/virtio/vhost-vdpa.c | 15 +++++++++------ > hw/virtio/vhost.c | 15 ++++++++++++++- > include/hw/virtio/vhost.h | 1 + > 4 files changed, 45 insertions(+), 10 deletions(-) > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index 30379d2..422c9bf 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c > @@ -325,7 +325,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState > *ncs, > int total_notifiers = data_queue_pairs * 2 + cvq; > VirtIONet *n = VIRTIO_NET(dev); > int nvhosts = data_queue_pairs + cvq; > - struct vhost_net *net; > + struct vhost_net *net = NULL; > int r, e, i, index_end = data_queue_pairs * 2; > NetClientState *peer; > > @@ -391,8 +391,17 @@ int vhost_net_start(VirtIODevice *dev, NetClientState > *ncs, > err_start: > while (--i >= 0) { > peer = qemu_get_peer(ncs , i); > - vhost_net_stop_one(get_vhost_net(peer), dev); > + > + net = get_vhost_net(peer); > + > + vhost_net_stop_one(net, dev); > } > + > + /* We only reset backend vdpa device */ > + if (net && net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) { > + vhost_dev_reset(&net->dev); > + } > + > e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); > if (e < 0) { > fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); > @@ -410,6 +419,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState > *ncs, > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); > VirtIONet *n = VIRTIO_NET(dev); > NetClientState *peer; > + struct vhost_net *net = NULL; > int total_notifiers = data_queue_pairs * 2 + cvq; > int nvhosts = data_queue_pairs + cvq; > int i, r; > @@ -420,7 +430,15 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState > *ncs, > } else { > peer = qemu_get_peer(ncs, n->max_queue_pairs); > } > - vhost_net_stop_one(get_vhost_net(peer), dev); > + > + net = get_vhost_net(peer); > + > + vhost_net_stop_one(net, dev); > + } > + > + /* We only reset backend vdpa device */ > + if (net && net->dev.vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) { > + vhost_dev_reset(&net->dev); > } So we've already reset the device in vhost_vdpa_dev_start(), any reason we need to do it again here? > > r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > index c5ed7a3..3ef0199 100644 > --- a/hw/virtio/vhost-vdpa.c > +++ b/hw/virtio/vhost-vdpa.c > @@ -708,6 +708,11 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) > > ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); > trace_vhost_vdpa_reset_device(dev, status); > + > + /* Add back this status, so that the device could work next time*/ > + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | > + VIRTIO_CONFIG_S_DRIVER); This seems to contradict the semantic of reset. > + > return ret; > } > > @@ -719,14 +724,14 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev > *dev, int idx) > return idx; > } > > -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) > +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned int > ready) > { > int i; > trace_vhost_vdpa_set_vring_ready(dev); > for (i = 0; i < dev->nvqs; ++i) { > struct vhost_vring_state state = { > .index = dev->vq_index + i, > - .num = 1, > + .num = ready, > }; > vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); > } > @@ -1088,8 +1093,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, > bool started) > if (unlikely(!ok)) { > return -1; > } > - vhost_vdpa_set_vring_ready(dev); > + vhost_vdpa_set_vring_ready(dev, 1); > } else { > + vhost_vdpa_set_vring_ready(dev, 0); > ok = vhost_vdpa_svqs_stop(dev); > if (unlikely(!ok)) { > return -1; > @@ -1105,9 +1111,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, > bool started) > memory_listener_register(&v->listener, &address_space_memory); > return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); > } else { > - vhost_vdpa_reset_device(dev); > - vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | > - VIRTIO_CONFIG_S_DRIVER); > memory_listener_unregister(&v->listener); > > return 0; > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index b643f42..7e0cdb6 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -1820,7 +1820,6 @@ fail_features: > void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) > { > int i; > - Unnecessary changes. > /* should only be called after backend is connected */ > assert(hdev->vhost_ops); > > @@ -1854,3 +1853,17 @@ int vhost_net_set_backend(struct vhost_dev *hdev, > > return -ENOSYS; > } > + > +int vhost_dev_reset(struct vhost_dev *hdev) > +{ Let's use a separate patch for this. Thanks > + int ret = 0; > + > + /* should only be called after backend is connected */ > + assert(hdev->vhost_ops); > + > + if (hdev->vhost_ops->vhost_reset_device) { > + ret = hdev->vhost_ops->vhost_reset_device(hdev); > + } > + > + return ret; > +} > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index 58a73e7..b8b7c20 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -114,6 +114,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > void vhost_dev_cleanup(struct vhost_dev *hdev); > int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); > void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); > +int vhost_dev_reset(struct vhost_dev *hdev); > int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); > void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); > > -- > 1.8.3.1 > > >