在 2024/1/18 22:46, David Marchand 写道:
Hello,

On Thu, Jan 18, 2024 at 11:34 AM Hao Chen <ch...@yusur.tech> wrote:

In a nested virtualization environment, running dpdk vdpa in QEMU-L1 for
software live migration will result in a deadlock between dpdke-vdpa and
QEMU-L2 processes.
rte_vdpa_relay_vring_used->
__vhost_iova_to_vva->
vhost_user_iotlb_rd_unlock(vq)->
vhost_user_iotlb_miss-> send vhost message VHOST_USER_SLAVE_IOTLB_MSG to
QEMU's vdpa socket,
then call vhost_user_iotlb_rd_lock(vq) to hold the read lock `iotlb_lock`.
But there is no place to release this read lock.

QEMU L2 get the VHOST_USER_SLAVE_IOTLB_MSG,
then call vhost_user_send_device_iotlb_msg to send VHOST_USER_IOTLB_MSG
messages to dpdk-vdpa.
Dpdk vdpa will call vhost_user_iotlb_msg->
vhost_user_iotlb_cache_insert, here, will obtain the write lock
`iotlb_lock`, but the read lock `iotlb_lock` has not been released and
will block here.

This patch add lock and unlock function to fix the deadlock.

Please identify the commit that first had this issue and add a Fixes: tag.
Ok.


Signed-off-by: Hao Chen <ch...@yusur.tech>
---
  lib/vhost/vdpa.c | 8 +++++++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index 9776fc07a9..9132414209 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -19,6 +19,7 @@
  #include "rte_vdpa.h"
  #include "vdpa_driver.h"
  #include "vhost.h"
+#include "iotlb.h"

  /** Double linked list of vDPA devices. */
  TAILQ_HEAD(vdpa_device_list, rte_vdpa_device);
@@ -193,10 +194,12 @@ rte_vdpa_relay_vring_used(int vid, uint16_t qid, void 
*vring_m)
                         if (unlikely(nr_descs > vq->size))
                                 return -1;

+                       vhost_user_iotlb_rd_lock(vq);
                         desc_ring = (struct vring_desc *)(uintptr_t)
                                 vhost_iova_to_vva(dev, vq,
                                                 vq->desc[desc_id].addr, &dlen,
                                                 VHOST_ACCESS_RO);
+                       vhost_user_iotlb_rd_unlock(vq);
                         if (unlikely(!desc_ring))
                                 return -1;

@@ -220,9 +223,12 @@ rte_vdpa_relay_vring_used(int vid, uint16_t qid, void 
*vring_m)
                         if (unlikely(nr_descs-- == 0))
                                 goto fail;
                         desc = desc_ring[desc_id];
-                       if (desc.flags & VRING_DESC_F_WRITE)
+                       if (desc.flags & VRING_DESC_F_WRITE) {
+                               vhost_user_iotlb_rd_lock(vq);
                                 vhost_log_write_iova(dev, vq, desc.addr,
                                                      desc.len);
+                               vhost_user_iotlb_rd_unlock(vq);
+                       }
                         desc_id = desc.next;
                 } while (desc.flags & VRING_DESC_F_NEXT);


Interesting, I suspected a bug in this area as clang was complaining.

Please try to remove the __rte_no_thread_safety_analysis annotation
and compile with clang.

https://git.dpdk.org/dpdk/tree/lib/vhost/vdpa.c#n150

You will get:

ccache clang -Ilib/librte_vhost.a.p -Ilib -I../lib -Ilib/vhost
-I../lib/vhost -I. -I.. -Iconfig -I../config -Ilib/eal/include
-I../lib/eal/include -Ilib/eal/linux/include
-I../lib/eal/linux/include -Ilib/eal/x86/include
-I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common
-Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log
-I../lib/log -Ilib/metrics -I../lib/metrics -Ilib/telemetry
-I../lib/telemetry -Ilib/ethdev -I../lib/ethdev -Ilib/net -I../lib/net
-Ilib/mbuf -I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring
-I../lib/ring -Ilib/meter -I../lib/meter -Ilib/cryptodev
-I../lib/cryptodev -Ilib/rcu -I../lib/rcu -Ilib/hash -I../lib/hash
-Ilib/pci -I../lib/pci -Ilib/dmadev -I../lib/dmadev
-fcolor-diagnostics -fsanitize=address -fno-omit-frame-pointer
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wextra -Werror -std=c11
-O0 -g -include rte_config.h -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declarations
-Wmissing-prototypes -Wnested-externs -Wold-style-definition
-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
-Wwrite-strings -Wno-address-of-packed-member
-Wno-missing-field-initializers -D_GNU_SOURCE -fPIC -march=native
-mrtm -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API
-DVHOST_CLANG_UNROLL_PRAGMA -fno-strict-aliasing -DVHOST_HAS_VDUSE
-DRTE_LOG_DEFAULT_LOGTYPE=lib.vhost -DRTE_ANNOTATE_LOCKS
-Wthread-safety -MD -MQ lib/librte_vhost.a.p/vhost_vdpa.c.o -MF
lib/librte_vhost.a.p/vhost_vdpa.c.o.d -o
lib/librte_vhost.a.p/vhost_vdpa.c.o -c ../lib/vhost/vdpa.c
../lib/vhost/vdpa.c:196:5: error: calling function 'vhost_iova_to_vva'
requires holding mutex 'vq->iotlb_lock'
[-Werror,-Wthread-safety-analysis]
                                 vhost_iova_to_vva(dev, vq,
                                 ^
../lib/vhost/vdpa.c:203:13: error: calling function
'vhost_alloc_copy_ind_table' requires holding mutex 'vq->iotlb_lock'
[-Werror,-Wthread-safety-analysis]
                                 idesc = vhost_alloc_copy_ind_table(dev, vq,
                                         ^
../lib/vhost/vdpa.c:223:5: error: calling function
'vhost_log_write_iova' requires holding mutex 'vq->iotlb_lock'
[-Werror,-Wthread-safety-analysis]
                                 vhost_log_write_iova(dev, vq, desc.addr,
                                 ^
3 errors generated.


We may need to protect the vhost_alloc_copy_ind_table() call too.
What do you think?
Yes, I missed this part. Thank you.


Reply via email to