The issue arose due to the change in the DPDK read-write lock
implementation. That change added a new flag, RTE_RWLOCK_WAIT, designed
to prevent new read locks while a write lock is in the queue. However,
this change has led to a scenario where a recursive read lock, where a
lock is acquired twice by the same execution thread, can initiate a
sequence of events resulting in a deadlock:

Process 1 takes the first read lock.
Process 2 attempts to take a write lock, triggering RTE_RWLOCK_WAIT due
to the presence of a read lock. This makes process 2 enter a wait loop
until the read lock is released.
Process 1 tries to take a second read lock. However, since a write lock
is waiting (due to RTE_RWLOCK_WAIT), it also enters a wait loop until
the write lock is acquired and then released.

Both processes end up in a blocked state, unable to proceed, resulting
in a deadlock scenario.

Following these changes, the RW-lock no longer supports
recursion, implying that a single thread shouldn't obtain a read lock if
it already possesses one. The problem arises during initialization: the
rte_eal_init() function acquires the memory_hotplug_lock, and later on,
the sequence of calls rte_eal_memory_init() -> eal_memalloc_init() ->
rte_memseg_list_walk() acquires it again without releasing it. This
scenario introduces the risk of a potential deadlock when concurrent
write locks are applied to the same memory_hotplug_lock. To address this
we resolved the issue by replacing rte_memseg_list_walk() with
rte_memseg_list_walk_thread_unsafe().

Implementing a lock annotation for rte_memseg_list_walk() to
proactively identify bugs similar to this one during compile time.

Bugzilla ID: 1277
Fixes: 832cecc03d77 ("rwlock: prevent readers from starving writers")
Cc: sta...@dpdk.org

Signed-off-by: Artemy Kovalyov <artem...@nvidia.com>
---
v2:
changed walk to thread-unsafe version in eal_dynmem_hugepage_init() 32-bit flow
v3:
added lock annotation for the flow
---
 lib/eal/common/eal_common_dynmem.c     | 5 ++++-
 lib/eal/common/eal_memalloc.h          | 3 ++-
 lib/eal/common/eal_private.h           | 3 ++-
 lib/eal/include/generic/rte_rwlock.h   | 4 ++++
 lib/eal/include/rte_lock_annotations.h | 5 +++++
 lib/eal/include/rte_memory.h           | 4 +++-
 lib/eal/linux/eal_memalloc.c           | 7 +++++--
 7 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/lib/eal/common/eal_common_dynmem.c 
b/lib/eal/common/eal_common_dynmem.c
index bdbbe233a0..95da55d9b0 100644
--- a/lib/eal/common/eal_common_dynmem.c
+++ b/lib/eal/common/eal_common_dynmem.c
@@ -251,7 +251,10 @@ eal_dynmem_hugepage_init(void)
                 */
                memset(&dummy, 0, sizeof(dummy));
                dummy.hugepage_sz = hpi->hugepage_sz;
-               if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
+               /*  memory_hotplug_lock is held during initialization, so it's
+                *  safe to call thread-unsafe version.
+                */
+               if (rte_memseg_list_walk_thread_unsafe(hugepage_count_walk, 
&dummy) < 0)
                        return -1;
 
                for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
diff --git a/lib/eal/common/eal_memalloc.h b/lib/eal/common/eal_memalloc.h
index ebc3a6f6c1..286ffb7633 100644
--- a/lib/eal/common/eal_memalloc.h
+++ b/lib/eal/common/eal_memalloc.h
@@ -91,7 +91,8 @@ int
 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
 
 int
-eal_memalloc_init(void);
+eal_memalloc_init(void)
+       __rte_shared_locks_required(rte_mcfg_mem_get_lock());
 
 int
 eal_memalloc_cleanup(void);
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 5eadba4902..ebd496b537 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -115,7 +115,8 @@ int rte_eal_memseg_init(void);
  * @return
  *   0 on success, negative on error
  */
-int rte_eal_memory_init(void);
+int rte_eal_memory_init(void)
+       __rte_shared_locks_required(rte_mcfg_mem_get_lock());
 
 /**
  * Configure timers
diff --git a/lib/eal/include/generic/rte_rwlock.h 
b/lib/eal/include/generic/rte_rwlock.h
index 9e083bbc61..c98fc7d083 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -80,6 +80,10 @@ rte_rwlock_init(rte_rwlock_t *rwl)
 /**
  * Take a read lock. Loop until the lock is held.
  *
+ * @note The RW lock isn't recursive, so calling this function on the same
+ * lock twice without releasing it could potentially result in a deadlock
+ * scenario when a write lock is involved.
+ *
  * @param rwl
  *   A pointer to a rwlock structure.
  */
diff --git a/lib/eal/include/rte_lock_annotations.h 
b/lib/eal/include/rte_lock_annotations.h
index 9fc50082d6..2456a69352 100644
--- a/lib/eal/include/rte_lock_annotations.h
+++ b/lib/eal/include/rte_lock_annotations.h
@@ -40,6 +40,9 @@ extern "C" {
 #define __rte_unlock_function(...) \
        __attribute__((unlock_function(__VA_ARGS__)))
 
+#define __rte_locks_excluded(...) \
+       __attribute__((locks_excluded(__VA_ARGS__)))
+
 #define __rte_no_thread_safety_analysis \
        __attribute__((no_thread_safety_analysis))
 
@@ -62,6 +65,8 @@ extern "C" {
 
 #define __rte_unlock_function(...)
 
+#define __rte_locks_excluded(...)
+
 #define __rte_no_thread_safety_analysis
 
 #endif /* RTE_ANNOTATE_LOCKS */
diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h
index 3a1c607228..842362d527 100644
--- a/lib/eal/include/rte_memory.h
+++ b/lib/eal/include/rte_memory.h
@@ -22,6 +22,7 @@ extern "C" {
 #include <rte_bitops.h>
 #include <rte_common.h>
 #include <rte_config.h>
+#include <rte_eal_memconfig.h>
 #include <rte_fbarray.h>
 
 #define RTE_PGSIZE_4K   (1ULL << 12)
@@ -250,7 +251,8 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void 
*arg);
  *   -1 if user function reported error
  */
 int
-rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+       __rte_locks_excluded(rte_mcfg_mem_get_lock());
 
 /**
  * Walk list of all memsegs without performing any locking.
diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c
index f8b1588cae..9853ec78a2 100644
--- a/lib/eal/linux/eal_memalloc.c
+++ b/lib/eal/linux/eal_memalloc.c
@@ -1740,7 +1740,10 @@ eal_memalloc_init(void)
                eal_get_internal_configuration();
 
        if (rte_eal_process_type() == RTE_PROC_SECONDARY)
-               if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0)
+               /*  memory_hotplug_lock is held during initialization, so it's
+                *  safe to call thread-unsafe version.
+                */
+               if 
(rte_memseg_list_walk_thread_unsafe(secondary_msl_create_walk, NULL) < 0)
                        return -1;
        if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
                        internal_conf->in_memory) {
@@ -1778,7 +1781,7 @@ eal_memalloc_init(void)
        }
 
        /* initialize all of the fd lists */
-       if (rte_memseg_list_walk(fd_list_create_walk, NULL))
+       if (rte_memseg_list_walk_thread_unsafe(fd_list_create_walk, NULL))
                return -1;
        return 0;
 }
-- 
2.25.1

Reply via email to