The issue arose due to the change in the DPDK read-write lock implementation. That change added a new flag, RTE_RWLOCK_WAIT, designed to prevent new read locks while a write lock is in the queue. However, this change has led to a scenario where a recursive read lock, where a lock is acquired twice by the same execution thread, can initiate a sequence of events resulting in a deadlock:
Process 1 takes the first read lock. Process 2 attempts to take a write lock, triggering RTE_RWLOCK_WAIT due to the presence of a read lock. This makes process 2 enter a wait loop until the read lock is released. Process 1 tries to take a second read lock. However, since a write lock is waiting (due to RTE_RWLOCK_WAIT), it also enters a wait loop until the write lock is acquired and then released. Both processes end up in a blocked state, unable to proceed, resulting in a deadlock scenario. Following these changes, the RW-lock no longer supports recursion, implying that a single thread shouldn't obtain a read lock if it already possesses one. The problem arises during initialization: the rte_eal_init() function acquires the memory_hotplug_lock, and later on, the sequence of calls rte_eal_memory_init() -> eal_memalloc_init() -> rte_memseg_list_walk() acquires it again without releasing it. This scenario introduces the risk of a potential deadlock when concurrent write locks are applied to the same memory_hotplug_lock. To address this we resolved the issue by replacing rte_memseg_list_walk() with rte_memseg_list_walk_thread_unsafe(). Implementing a lock annotation for rte_memseg_list_walk() to proactively identify bugs similar to this one during compile time. Bugzilla ID: 1277 Fixes: 832cecc03d77 ("rwlock: prevent readers from starving writers") Cc: sta...@dpdk.org Signed-off-by: Artemy Kovalyov <artem...@nvidia.com> --- v2: changed walk to thread-unsafe version in eal_dynmem_hugepage_init() 32-bit flow v3: added lock annotation for the flow --- lib/eal/common/eal_common_dynmem.c | 5 ++++- lib/eal/common/eal_memalloc.h | 3 ++- lib/eal/common/eal_private.h | 3 ++- lib/eal/include/generic/rte_rwlock.h | 4 ++++ lib/eal/include/rte_lock_annotations.h | 5 +++++ lib/eal/include/rte_memory.h | 4 +++- lib/eal/linux/eal_memalloc.c | 7 +++++-- 7 files changed, 25 insertions(+), 6 deletions(-) diff --git a/lib/eal/common/eal_common_dynmem.c b/lib/eal/common/eal_common_dynmem.c index bdbbe233a0..95da55d9b0 100644 --- a/lib/eal/common/eal_common_dynmem.c +++ b/lib/eal/common/eal_common_dynmem.c @@ -251,7 +251,10 @@ eal_dynmem_hugepage_init(void) */ memset(&dummy, 0, sizeof(dummy)); dummy.hugepage_sz = hpi->hugepage_sz; - if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0) + /* memory_hotplug_lock is held during initialization, so it's + * safe to call thread-unsafe version. + */ + if (rte_memseg_list_walk_thread_unsafe(hugepage_count_walk, &dummy) < 0) return -1; for (i = 0; i < RTE_DIM(dummy.num_pages); i++) { diff --git a/lib/eal/common/eal_memalloc.h b/lib/eal/common/eal_memalloc.h index ebc3a6f6c1..286ffb7633 100644 --- a/lib/eal/common/eal_memalloc.h +++ b/lib/eal/common/eal_memalloc.h @@ -91,7 +91,8 @@ int eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset); int -eal_memalloc_init(void); +eal_memalloc_init(void) + __rte_shared_locks_required(rte_mcfg_mem_get_lock()); int eal_memalloc_cleanup(void); diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h index 5eadba4902..ebd496b537 100644 --- a/lib/eal/common/eal_private.h +++ b/lib/eal/common/eal_private.h @@ -115,7 +115,8 @@ int rte_eal_memseg_init(void); * @return * 0 on success, negative on error */ -int rte_eal_memory_init(void); +int rte_eal_memory_init(void) + __rte_shared_locks_required(rte_mcfg_mem_get_lock()); /** * Configure timers diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h index 9e083bbc61..c98fc7d083 100644 --- a/lib/eal/include/generic/rte_rwlock.h +++ b/lib/eal/include/generic/rte_rwlock.h @@ -80,6 +80,10 @@ rte_rwlock_init(rte_rwlock_t *rwl) /** * Take a read lock. Loop until the lock is held. * + * @note The RW lock isn't recursive, so calling this function on the same + * lock twice without releasing it could potentially result in a deadlock + * scenario when a write lock is involved. + * * @param rwl * A pointer to a rwlock structure. */ diff --git a/lib/eal/include/rte_lock_annotations.h b/lib/eal/include/rte_lock_annotations.h index 9fc50082d6..2456a69352 100644 --- a/lib/eal/include/rte_lock_annotations.h +++ b/lib/eal/include/rte_lock_annotations.h @@ -40,6 +40,9 @@ extern "C" { #define __rte_unlock_function(...) \ __attribute__((unlock_function(__VA_ARGS__))) +#define __rte_locks_excluded(...) \ + __attribute__((locks_excluded(__VA_ARGS__))) + #define __rte_no_thread_safety_analysis \ __attribute__((no_thread_safety_analysis)) @@ -62,6 +65,8 @@ extern "C" { #define __rte_unlock_function(...) +#define __rte_locks_excluded(...) + #define __rte_no_thread_safety_analysis #endif /* RTE_ANNOTATE_LOCKS */ diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h index 3a1c607228..842362d527 100644 --- a/lib/eal/include/rte_memory.h +++ b/lib/eal/include/rte_memory.h @@ -22,6 +22,7 @@ extern "C" { #include <rte_bitops.h> #include <rte_common.h> #include <rte_config.h> +#include <rte_eal_memconfig.h> #include <rte_fbarray.h> #define RTE_PGSIZE_4K (1ULL << 12) @@ -250,7 +251,8 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); * -1 if user function reported error */ int -rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) + __rte_locks_excluded(rte_mcfg_mem_get_lock()); /** * Walk list of all memsegs without performing any locking. diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c index f8b1588cae..9853ec78a2 100644 --- a/lib/eal/linux/eal_memalloc.c +++ b/lib/eal/linux/eal_memalloc.c @@ -1740,7 +1740,10 @@ eal_memalloc_init(void) eal_get_internal_configuration(); if (rte_eal_process_type() == RTE_PROC_SECONDARY) - if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0) + /* memory_hotplug_lock is held during initialization, so it's + * safe to call thread-unsafe version. + */ + if (rte_memseg_list_walk_thread_unsafe(secondary_msl_create_walk, NULL) < 0) return -1; if (rte_eal_process_type() == RTE_PROC_PRIMARY && internal_conf->in_memory) { @@ -1778,7 +1781,7 @@ eal_memalloc_init(void) } /* initialize all of the fd lists */ - if (rte_memseg_list_walk(fd_list_create_walk, NULL)) + if (rte_memseg_list_walk_thread_unsafe(fd_list_create_walk, NULL)) return -1; return 0; } -- 2.25.1