Currently, when VM poweroff, it will trigger vdpa device(such as mlx bluefield2 VF) reset many times(with 1 datapath queue pair and one control queue, triggered 3 times), this leads to below issue:
vhost VQ 2 ring restore failed: -22: Invalid argument (22) This because in vhost_net_stop(), it will stop all vhost device bind to this virtio device, and in vhost_dev_stop(), qemu tries to stop the device , then stop the queue: vhost_virtqueue_stop(). In vhost_dev_stop(), it resets the device, which clear some flags in low level driver, and in next loop(stop other vhost backends), qemu try to stop the queue corresponding to the vhost backend, the driver finds that the VQ is invalied, this is the root cause. To solve the issue, vdpa should set vring unready, and remove reset ops in device stop: vhost_dev_start(hdev, false). and implement a new function vhost_dev_reset, only reset backend device after all vhost(per-queue) stopped. Signed-off-by: Michael Qiu<qiud...@archeros.com> Acked-by: Jason Wang <jasow...@redhat.com> --- v5 --> v4: move vhost_dev_reset() call after set_guest_notifiers remove implementation of vhost_dev_reset() remove backend check for VHOST_BACKEND_TYPE_VDPA v4 --> v3: Nothing changed, becasue of issue with mimecast, when the From: tag is different from the sender, the some mail client will take the patch as an attachment, RESEND v3 does not work, So resend the patch as v4 v3 --> v2: Call vhost_dev_reset() at the end of vhost_net_stop(). Since the vDPA device need re-add the status bit VIRTIO_CONFIG_S_ACKNOWLEDGE and VIRTIO_CONFIG_S_DRIVER, simply, add them inside vhost_vdpa_reset_device, and the only way calling vhost_vdpa_reset_device is in vhost_net_stop(), so it keeps the same behavior as before. v2 --> v1: Implement a new function vhost_dev_reset, reset the backend kernel device at last. --- hw/net/vhost_net.c | 22 +++++++++++++++++++--- hw/virtio/vhost-vdpa.c | 15 +++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 30379d2..30c76ca 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -325,7 +325,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_notifiers = data_queue_pairs * 2 + cvq; VirtIONet *n = VIRTIO_NET(dev); int nvhosts = data_queue_pairs + cvq; - struct vhost_net *net; + struct vhost_net *net = NULL; int r, e, i, index_end = data_queue_pairs * 2; NetClientState *peer; @@ -391,13 +391,21 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, err_start: while (--i >= 0) { peer = qemu_get_peer(ncs , i); - vhost_net_stop_one(get_vhost_net(peer), dev); + + net = get_vhost_net(peer); + + vhost_net_stop_one(net, dev); } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); if (e < 0) { fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); fflush(stderr); } + + if (net) { + vhost_dev_reset(&net->dev); + } err: return r; } @@ -410,6 +418,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); VirtIONet *n = VIRTIO_NET(dev); NetClientState *peer; + struct vhost_net *net = NULL; int total_notifiers = data_queue_pairs * 2 + cvq; int nvhosts = data_queue_pairs + cvq; int i, r; @@ -420,7 +429,10 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, } else { peer = qemu_get_peer(ncs, n->max_queue_pairs); } - vhost_net_stop_one(get_vhost_net(peer), dev); + + net = get_vhost_net(peer); + + vhost_net_stop_one(net, dev); } r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); @@ -429,6 +441,10 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, fflush(stderr); } assert(r >= 0); + + if (net) { + vhost_dev_reset(&net->dev); + } } void vhost_net_cleanup(struct vhost_net *net) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index c5ed7a3..3ef0199 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -708,6 +708,11 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); + + /* Add back this status, so that the device could work next time*/ + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER); + return ret; } @@ -719,14 +724,14 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) return idx; } -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned int ready) { int i; trace_vhost_vdpa_set_vring_ready(dev); for (i = 0; i < dev->nvqs; ++i) { struct vhost_vring_state state = { .index = dev->vq_index + i, - .num = 1, + .num = ready, }; vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); } @@ -1088,8 +1093,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (unlikely(!ok)) { return -1; } - vhost_vdpa_set_vring_ready(dev); + vhost_vdpa_set_vring_ready(dev, 1); } else { + vhost_vdpa_set_vring_ready(dev, 0); ok = vhost_vdpa_svqs_stop(dev); if (unlikely(!ok)) { return -1; @@ -1105,9 +1111,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) memory_listener_register(&v->listener, &address_space_memory); return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } else { - vhost_vdpa_reset_device(dev); - vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | - VIRTIO_CONFIG_S_DRIVER); memory_listener_unregister(&v->listener); return 0; -- 1.8.3.1