In a nested virtualization environment, running dpdk-vdpa in QEMU-L1 for
software live migration will result in a deadlock between dpdke-vdpa and
QEMU-L2 processes.
'rte_vdpa_relay_vring_used'->
'__vhost_iova_to_vva'->
'vhost_user_iotlb_rd_unlock(vq)'->
'vhost_user_iotlb_miss'-> send vhost message 'VHOST_USER_SLAVE_IOTLB_MSG'
to QEMU-L2's vdpa socket,
then call 'vhost_user_iotlb_rd_lock(vq)' to hold the read lock `iotlb_lock`.
But there is no place to release this read lock.

QEMU-L2 get the 'VHOST_USER_SLAVE_IOTLB_MSG',
then call 'vhost_user_send_device_iotlb_msg' to send 'VHOST_USER_IOTLB_MSG'
messages to dpdk-vdpa.
Dpdk-vdpa will call vhost_user_iotlb_msg->
vhost_user_iotlb_cache_insert, here, will obtain the write lock
`iotlb_lock`, but the read lock `iotlb_lock` has not been released and
will block here.

This patch add lock and unlock function to fix the deadlock.

Fixes: b13ad2decc83 ("vhost: provide helpers for virtio ring relay")
Cc: sta...@dpdk.org

Signed-off-by: Hao Chen <ch...@yusur.tech>
---
Changes v1 ... v2:
- protect the vhost_alloc_copy_ind_table() call too.

 lib/vhost/vdpa.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index 9776fc07a9..a1dd5a753b 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -19,6 +19,7 @@
 #include "rte_vdpa.h"
 #include "vdpa_driver.h"
 #include "vhost.h"
+#include "iotlb.h"
 
 /** Double linked list of vDPA devices. */
 TAILQ_HEAD(vdpa_device_list, rte_vdpa_device);
@@ -147,7 +148,6 @@ rte_vdpa_unregister_device(struct rte_vdpa_device *dev)
 
 int
 rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
-       __rte_no_thread_safety_analysis /* FIXME: requires iotlb_lock? */
 {
        struct virtio_net *dev = get_device(vid);
        uint16_t idx, idx_m, desc_id;
@@ -193,17 +193,21 @@ rte_vdpa_relay_vring_used(int vid, uint16_t qid, void 
*vring_m)
                        if (unlikely(nr_descs > vq->size))
                                return -1;
 
+                       vhost_user_iotlb_rd_lock(vq);
                        desc_ring = (struct vring_desc *)(uintptr_t)
                                vhost_iova_to_vva(dev, vq,
                                                vq->desc[desc_id].addr, &dlen,
                                                VHOST_ACCESS_RO);
+                       vhost_user_iotlb_rd_unlock(vq);
                        if (unlikely(!desc_ring))
                                return -1;
 
                        if (unlikely(dlen < vq->desc[desc_id].len)) {
+                               vhost_user_iotlb_rd_lock(vq);
                                idesc = vhost_alloc_copy_ind_table(dev, vq,
                                                vq->desc[desc_id].addr,
                                                vq->desc[desc_id].len);
+                               vhost_user_iotlb_rd_unlock(vq);
                                if (unlikely(!idesc))
                                        return -1;
 
@@ -220,9 +224,12 @@ rte_vdpa_relay_vring_used(int vid, uint16_t qid, void 
*vring_m)
                        if (unlikely(nr_descs-- == 0))
                                goto fail;
                        desc = desc_ring[desc_id];
-                       if (desc.flags & VRING_DESC_F_WRITE)
+                       if (desc.flags & VRING_DESC_F_WRITE) {
+                               vhost_user_iotlb_rd_lock(vq);
                                vhost_log_write_iova(dev, vq, desc.addr,
                                                     desc.len);
+                               vhost_user_iotlb_rd_unlock(vq);
+                       }
                        desc_id = desc.next;
                } while (desc.flags & VRING_DESC_F_NEXT);
 
-- 
2.27.0

Reply via email to