Hi Corinna,

On Wed, 26 Mar 2025 10:33:48 +0100
Corinna Vinschen wrote:
> On Mar 26 18:14, Takashi Yano wrote:
> > Hi Corinna,
> > 
> > On Mon, 24 Mar 2025 16:35:08 +0100
> > Corinna Vinschen wrote:
> > > On Mar 24 22:05, Takashi Yano wrote:
> > > > Hi Corinna,
> > > > 
> > > > On Mon, 24 Mar 2025 11:57:06 +0100
> > > > Corinna Vinschen wrote:
> > > > > I wonder if we shouldn't drop the keys list structure entirely, and
> > > > > convert "keys" to a simple sequence number + destructor array, as in
> > > > > GLibc.  This allows lockless key operations and drop the entire list 
> > > > > and
> > > > > mutex overhead.  The code would become dirt-easy, see
> > > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_create.c
> > > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_delete.c
> > > > > 
> > > > > What do you think?
> > > > 
> > > > It looks very simple and reasonable to me.
> > > > 
> > > > > However, for 3.6.1, the below patch should be ok.
> > > > 
> > > > What about reimplementing pthread_key_create/pthread_key_delete
> > > > based on glibc for master branch, and appling this patch to
> > > > cygwin-3_6-branch?
> > > > 
> > > > Shall I try to reimplement them?
> > > 
> > > That would be great!
> > 
> > What about the patch attached?
> > Is this as you intended?
> 
> Yes!
> 
> >  private:
> > -  static List<pthread_key> keys;
> > +  int key_idx;
> > +  static class keys_list {
> > +    ULONG seq;
> 
> GLibc uses uintptr_t for the sequence number to avoid overflow.
> So we could use ULONG64 and InterlockedCompareExchange64 here, too.
> 
> Looks good to me, thanks!

New version of the patch attached. This realizes quasi-lock-free
access to the pthread_keys array. Please review.

-- 
Takashi Yano <takashi.y...@nifty.ne.jp>
From a5ee99759faeb33f8d920d263f70e3a532e84b3e Mon Sep 17 00:00:00 2001
From: Takashi Yano <takashi.y...@nifty.ne.jp>
Date: Fri, 4 Apr 2025 21:22:27 +0900
Subject: [PATCH] Cygwin: thread: Use simple array instead of List<pthread_key>

Previously, List<pthread_key>, which used fast_mutex, was used for
accessing all the valid pthread_key. This caused a deadlock when
another pthread_key_create() is called in the destructor registered
by the previous pthread_key_create(). This is because the
run_all_destructors() calls the desructor via keys.for_each() where
both for_each() and pthread_key_create() (that calls List_insert())
attempt to acquire the lock.
With this patch, use simple array of pthread_key instead and realize
quasi-lock-free access to that array refering to the glibc code.

Addresses: https://cygwin.com/pipermail/cygwin/2025-March/257705.html
Fixes: 1a821390d11d ("fix race condition in List_insert")
Reported-by: Yuyi Wang <strawberry_...@hotmail.com>
Reviewed-by: Corinna Vinschen <cori...@vinschen.de>
Signed-off-by: Takashi Yano <takashi.y...@nifty.ne.jp>
---
 winsup/cygwin/local_includes/thread.h | 42 ++++++++++++++++++++++-----
 winsup/cygwin/thread.cc               | 31 +++++++++++++++++---
 2 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/winsup/cygwin/local_includes/thread.h 
b/winsup/cygwin/local_includes/thread.h
index b3496281e..3053de0a2 100644
--- a/winsup/cygwin/local_includes/thread.h
+++ b/winsup/cygwin/local_includes/thread.h
@@ -221,13 +221,26 @@ public:
   ~pthread_key ();
   static void fixup_before_fork ()
   {
-    keys.for_each (&pthread_key::_fixup_before_fork);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       if (!pthread_key::keys_list::ready (keys[cnt].seq))
+         continue;
+       if (InterlockedIncrement64 (&keys[cnt].busy_cnt) > 0)
+         keys[cnt].key->_fixup_before_fork ();
+       InterlockedDecrement64 (&keys[cnt].busy_cnt);
+      }
   }
 
   static void fixup_after_fork ()
   {
-    keys.fixup_after_fork ();
-    keys.for_each (&pthread_key::_fixup_after_fork);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       if (!pthread_key::keys_list::ready (keys[cnt].seq))
+         continue;
+       if (InterlockedIncrement64 (&keys[cnt].busy_cnt) > 0)
+         keys[cnt].key->_fixup_after_fork ();
+       InterlockedDecrement64 (&keys[cnt].busy_cnt);
+      }
   }
 
   static void run_all_destructors ()
@@ -246,16 +259,31 @@ public:
     for (int i = 0; i < PTHREAD_DESTRUCTOR_ITERATIONS; ++i)
       {
        iterate_dtors_once_more = false;
-       keys.for_each (&pthread_key::run_destructor);
+       for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+         {
+           if (!pthread_key::keys_list::ready (keys[cnt].seq))
+             continue;
+           if (InterlockedIncrement64 (&keys[cnt].busy_cnt) > 0)
+             keys[cnt].key->run_destructor ();
+           InterlockedDecrement64 (&keys[cnt].busy_cnt);
+         }
        if (!iterate_dtors_once_more)
          break;
       }
   }
 
-  /* List support calls */
-  class pthread_key *next;
 private:
-  static List<pthread_key> keys;
+  int key_idx;
+  static class keys_list {
+    LONG64 seq;
+    LONG64 busy_cnt;
+    pthread_key *key;
+    static bool used (LONG64 seq1) { return (seq1 & 3) != 0; }
+    static bool ready (LONG64 seq1) { return (seq1 & 3) == 2; }
+  public:
+    keys_list () : seq (0), busy_cnt (0), key (NULL) {}
+    friend class pthread_key;
+  } keys[PTHREAD_KEYS_MAX];
   void _fixup_before_fork ();
   void _fixup_after_fork ();
   void (*destructor) (void *);
diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc
index 9ee96504b..17600be75 100644
--- a/winsup/cygwin/thread.cc
+++ b/winsup/cygwin/thread.cc
@@ -32,6 +32,7 @@ details. */
 #include "ntdll.h"
 #include "cygwait.h"
 #include "exception.h"
+#include <assert.h>
 
 /* For Linux compatibility, the length of a thread name is 16 characters. */
 #define THRNAMELEN 16
@@ -1666,17 +1667,31 @@ pthread_rwlock::_fixup_after_fork ()
 /* pthread_key */
 /* static members */
 /* This stores pthread_key information across fork() boundaries */
-List<pthread_key> pthread_key::keys;
+pthread_key::keys_list pthread_key::keys[PTHREAD_KEYS_MAX];
 
 /* non-static members */
 
-pthread_key::pthread_key (void (*aDestructor) (void *)):verifyable_object 
(PTHREAD_KEY_MAGIC), destructor (aDestructor)
+pthread_key::pthread_key (void (*aDestructor) (void *)) :
+  verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor)
 {
   tls_index = TlsAlloc ();
   if (tls_index == TLS_OUT_OF_INDEXES)
     magic = 0;
   else
-    keys.insert (this);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       LONG64 seq = keys[cnt].seq;
+       if (!pthread_key::keys_list::used (seq)
+           && InterlockedCompareExchange64 (&keys[cnt].seq,
+                                            seq + 1, seq) == seq)
+         {
+           keys[cnt].key = this;
+           keys[cnt].busy_cnt = 0;
+           key_idx = cnt;
+           InterlockedIncrement64 (&keys[key_idx].seq);
+           break;
+         }
+      }
 }
 
 pthread_key::~pthread_key ()
@@ -1685,7 +1700,15 @@ pthread_key::~pthread_key ()
    */
   if (magic != 0)
     {
-      keys.remove (this);
+      LONG64 seq = keys[key_idx].seq;
+      assert (pthread_key::keys_list::ready (seq)
+             && InterlockedCompareExchange64 (&keys[key_idx].seq,
+                                              seq + 1, seq) == seq);
+      while (InterlockedCompareExchange64 (&keys[key_idx].busy_cnt,
+                                          INT64_MIN, 0) > 0)
+       yield ();
+      keys[key_idx].key = NULL;
+      InterlockedIncrement64 (&keys[key_idx].seq);
       TlsFree (tls_index);
     }
 }
-- 
2.45.1

Reply via email to