On Fri, 4 Apr 2025 21:49:43 +0900 Takashi Yano wrote: > Hi Corinna, > > On Wed, 26 Mar 2025 10:33:48 +0100 > Corinna Vinschen wrote: > > On Mar 26 18:14, Takashi Yano wrote: > > > Hi Corinna, > > > > > > On Mon, 24 Mar 2025 16:35:08 +0100 > > > Corinna Vinschen wrote: > > > > On Mar 24 22:05, Takashi Yano wrote: > > > > > Hi Corinna, > > > > > > > > > > On Mon, 24 Mar 2025 11:57:06 +0100 > > > > > Corinna Vinschen wrote: > > > > > > I wonder if we shouldn't drop the keys list structure entirely, and > > > > > > convert "keys" to a simple sequence number + destructor array, as in > > > > > > GLibc. This allows lockless key operations and drop the entire > > > > > > list and > > > > > > mutex overhead. The code would become dirt-easy, see > > > > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_create.c > > > > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_delete.c > > > > > > > > > > > > What do you think? > > > > > > > > > > It looks very simple and reasonable to me. > > > > > > > > > > > However, for 3.6.1, the below patch should be ok. > > > > > > > > > > What about reimplementing pthread_key_create/pthread_key_delete > > > > > based on glibc for master branch, and appling this patch to > > > > > cygwin-3_6-branch? > > > > > > > > > > Shall I try to reimplement them? > > > > > > > > That would be great! > > > > > > What about the patch attached? > > > Is this as you intended? > > > > Yes! > > > > > private: > > > - static List<pthread_key> keys; > > > + int key_idx; > > > + static class keys_list { > > > + ULONG seq; > > > > GLibc uses uintptr_t for the sequence number to avoid overflow. > > So we could use ULONG64 and InterlockedCompareExchange64 here, too. > > > > Looks good to me, thanks! > > New version of the patch attached. This realizes quasi-lock-free > access to the pthread_keys array. Please review.
Revised. -- Takashi Yano <takashi.y...@nifty.ne.jp>
From ed2c5eeb1c840bc6beaabb4ed3f1df4232740b2b Mon Sep 17 00:00:00 2001 From: Takashi Yano <takashi.y...@nifty.ne.jp> Date: Fri, 4 Apr 2025 21:22:27 +0900 Subject: [PATCH] Cygwin: thread: Use simple array instead of List<pthread_key> Previously, List<pthread_key>, which used fast_mutex, was used for accessing all the valid pthread_key. This caused a deadlock when another pthread_key_create() is called in the destructor registered by the previous pthread_key_create(). This is because the run_all_destructors() calls the desructor via keys.for_each() where both for_each() and pthread_key_create() (that calls List_insert()) attempt to acquire the lock. With this patch, use simple array of pthread_key instead and realize quasi-lock-free access to that array refering to the glibc code. Addresses: https://cygwin.com/pipermail/cygwin/2025-March/257705.html Fixes: 1a821390d11d ("fix race condition in List_insert") Reported-by: Yuyi Wang <strawberry_...@hotmail.com> Reviewed-by: Corinna Vinschen <cori...@vinschen.de> Signed-off-by: Takashi Yano <takashi.y...@nifty.ne.jp> --- winsup/cygwin/local_includes/thread.h | 31 +++++++++++++++++++++------ winsup/cygwin/thread.cc | 31 +++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/winsup/cygwin/local_includes/thread.h b/winsup/cygwin/local_includes/thread.h index b3496281e..455be1c91 100644 --- a/winsup/cygwin/local_includes/thread.h +++ b/winsup/cygwin/local_includes/thread.h @@ -221,13 +221,12 @@ public: ~pthread_key (); static void fixup_before_fork () { - keys.for_each (&pthread_key::_fixup_before_fork); + for_each (&pthread_key::_fixup_before_fork); } static void fixup_after_fork () { - keys.fixup_after_fork (); - keys.for_each (&pthread_key::_fixup_after_fork); + for_each (&pthread_key::_fixup_after_fork); } static void run_all_destructors () @@ -246,21 +245,39 @@ public: for (int i = 0; i < PTHREAD_DESTRUCTOR_ITERATIONS; ++i) { iterate_dtors_once_more = false; - keys.for_each (&pthread_key::run_destructor); + for_each (&pthread_key::run_destructor); if (!iterate_dtors_once_more) break; } } - /* List support calls */ - class pthread_key *next; private: - static List<pthread_key> keys; + int key_idx; + static class keys_list { + LONG64 seq; + LONG64 busy_cnt; + pthread_key *key; + static bool used (LONG64 seq1) { return (seq1 & 3) != 0; } + static bool ready (LONG64 seq1) { return (seq1 & 3) == 2; } + public: + keys_list () : seq (0), busy_cnt (INT64_MIN), key (NULL) {} + friend class pthread_key; + } keys[PTHREAD_KEYS_MAX]; void _fixup_before_fork (); void _fixup_after_fork (); void (*destructor) (void *); void run_destructor (); void *fork_buf; + static void for_each (void (pthread_key::*callback) ()) { + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + if (!pthread_key::keys_list::ready (keys[cnt].seq)) + continue; + if (InterlockedIncrement64 (&keys[cnt].busy_cnt) > 0) + (keys[cnt].key->*callback) (); + InterlockedDecrement64 (&keys[cnt].busy_cnt); + } + } }; class pthread_attr: public verifyable_object diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc index 9ee96504b..17600be75 100644 --- a/winsup/cygwin/thread.cc +++ b/winsup/cygwin/thread.cc @@ -32,6 +32,7 @@ details. */ #include "ntdll.h" #include "cygwait.h" #include "exception.h" +#include <assert.h> /* For Linux compatibility, the length of a thread name is 16 characters. */ #define THRNAMELEN 16 @@ -1666,17 +1667,31 @@ pthread_rwlock::_fixup_after_fork () /* pthread_key */ /* static members */ /* This stores pthread_key information across fork() boundaries */ -List<pthread_key> pthread_key::keys; +pthread_key::keys_list pthread_key::keys[PTHREAD_KEYS_MAX]; /* non-static members */ -pthread_key::pthread_key (void (*aDestructor) (void *)):verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor) +pthread_key::pthread_key (void (*aDestructor) (void *)) : + verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor) { tls_index = TlsAlloc (); if (tls_index == TLS_OUT_OF_INDEXES) magic = 0; else - keys.insert (this); + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + LONG64 seq = keys[cnt].seq; + if (!pthread_key::keys_list::used (seq) + && InterlockedCompareExchange64 (&keys[cnt].seq, + seq + 1, seq) == seq) + { + keys[cnt].key = this; + keys[cnt].busy_cnt = 0; + key_idx = cnt; + InterlockedIncrement64 (&keys[key_idx].seq); + break; + } + } } pthread_key::~pthread_key () @@ -1685,7 +1700,15 @@ pthread_key::~pthread_key () */ if (magic != 0) { - keys.remove (this); + LONG64 seq = keys[key_idx].seq; + assert (pthread_key::keys_list::ready (seq) + && InterlockedCompareExchange64 (&keys[key_idx].seq, + seq + 1, seq) == seq); + while (InterlockedCompareExchange64 (&keys[key_idx].busy_cnt, + INT64_MIN, 0) > 0) + yield (); + keys[key_idx].key = NULL; + InterlockedIncrement64 (&keys[key_idx].seq); TlsFree (tls_index); } } -- 2.45.1