Re: [PATCH] Cygwin: thread: Allow fast_mutex to be acquired multiple times.

Takashi Yano Wed, 26 Mar 2025 02:14:20 -0700

Hi Corinna,

On Mon, 24 Mar 2025 16:35:08 +0100
Corinna Vinschen wrote:
> On Mar 24 22:05, Takashi Yano wrote:
> > Hi Corinna,
> > 
> > On Mon, 24 Mar 2025 11:57:06 +0100
> > Corinna Vinschen wrote:
> > > On Mar 24 14:53, Takashi Yano wrote:
> > > > Previously, the fast_mutex defined in thread.h could not be aquired
> > > > multiple times, i.e., the thread causes deadlock if it attempted to
> > > > acquire a lock already acquired by the thread. For example, a deadlock
> > > > occurs if another pthread_key_create() is called in the destructor
> > > > specified in the previous pthread_key_create(). This is because the
> > > > run_all_destructors() calls the desructor via keys.for_each() where
> > > > both for_each() and pthread_key_create() (that calls List_insert())
> > > > attempt to acquire the lock. With this patch, the fast_mutex can be
> > > > acquired multiple times by the same thread similar to the behaviour
> > > > of a Windows mutex. In this implementation, the mutex is released
> > > > only when the number of unlock() calls matches the number of lock()
> > > > calls.
> > > 
> > > Doesn't that mean fast_mutex is now the same thing as muto?  The
> > > muto type was recursive from the beginning.  It's kind of weird
> > > to maintain two lock types which are equivalent.
> > 
> > I have just looked at muto implementation. Yeah, it looks very
> > similar to fast_mutex with this patch. However, the performance
> > is different. fast_mutex with this patch is two times faster
> > than muto when just repeatedly locking/unlocking. If two threads
> > compete for the same mutex, the performance is almost the same.
> 
> Ok, nice to know.  With fast_mutex being mostly faster and being
> recursive with your patch, maybe we could replace all mutos with
> this fast_mutex?
> 
> > > I wonder if we shouldn't drop the keys list structure entirely, and
> > > convert "keys" to a simple sequence number + destructor array, as in
> > > GLibc.  This allows lockless key operations and drop the entire list and
> > > mutex overhead.  The code would become dirt-easy, see
> > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_create.c
> > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_delete.c
> > > 
> > > What do you think?
> > 
> > It looks very simple and reasonable to me.
> > 
> > > However, for 3.6.1, the below patch should be ok.
> > 
> > What about reimplementing pthread_key_create/pthread_key_delete
> > based on glibc for master branch, and appling this patch to
> > cygwin-3_6-branch?
> > 
> > Shall I try to reimplement them?
> 
> That would be great!


What about the patch attached?
Is this as you intended?

-- 
Takashi Yano <takashi.y...@nifty.ne.jp>

diff --git a/winsup/cygwin/local_includes/thread.h 
b/winsup/cygwin/local_includes/thread.h
index b3496281e..d182d91ce 100644
--- a/winsup/cygwin/local_includes/thread.h
+++ b/winsup/cygwin/local_includes/thread.h
@@ -221,13 +221,28 @@ public:
   ~pthread_key ();
   static void fixup_before_fork ()
   {
-    keys.for_each (&pthread_key::_fixup_before_fork);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       if (!keys[cnt].unused ())
+         {
+           while (keys[cnt].busy ())
+             Sleep (0L);
+           keys[cnt].key->_fixup_before_fork ();
+         }
+      }
   }
 
   static void fixup_after_fork ()
   {
-    keys.fixup_after_fork ();
-    keys.for_each (&pthread_key::_fixup_after_fork);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       if (!keys[cnt].unused ())
+         {
+           while (keys[cnt].busy ())
+             Sleep (0L);
+           keys[cnt].key->_fixup_after_fork ();
+         }
+      }
   }
 
   static void run_all_destructors ()
@@ -246,16 +261,32 @@ public:
     for (int i = 0; i < PTHREAD_DESTRUCTOR_ITERATIONS; ++i)
       {
        iterate_dtors_once_more = false;
-       keys.for_each (&pthread_key::run_destructor);
+       for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+         {
+           if (!keys[cnt].unused ())
+             {
+               while (keys[cnt].busy ())
+                 Sleep (0L);
+               keys[cnt].key->run_destructor ();
+             }
+         }
        if (!iterate_dtors_once_more)
          break;
       }
   }
 
-  /* List support calls */
-  class pthread_key *next;
 private:
-  static List<pthread_key> keys;
+  int key_idx;
+  static class keys_list {
+    ULONG seq;
+    pthread_key *key;
+    bool unused () { return (seq & 3) == 0; }
+    bool usable () { return seq < seq + 4; }
+    bool busy () { return (seq & 3) == 1; }
+  public:
+    keys_list () : seq (0), key (NULL) {}
+    friend class pthread_key;
+  } keys[PTHREAD_KEYS_MAX];
   void _fixup_before_fork ();
   void _fixup_after_fork ();
   void (*destructor) (void *);
diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc
index 9ee96504b..a6a9962b4 100644
--- a/winsup/cygwin/thread.cc
+++ b/winsup/cygwin/thread.cc
@@ -1666,17 +1666,30 @@ pthread_rwlock::_fixup_after_fork ()
 /* pthread_key */
 /* static members */
 /* This stores pthread_key information across fork() boundaries */
-List<pthread_key> pthread_key::keys;
+pthread_key::keys_list pthread_key::keys[PTHREAD_KEYS_MAX];
 
 /* non-static members */
 
-pthread_key::pthread_key (void (*aDestructor) (void *)):verifyable_object 
(PTHREAD_KEY_MAGIC), destructor (aDestructor)
+pthread_key::pthread_key (void (*aDestructor) (void *)) :
+  verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor)
 {
   tls_index = TlsAlloc ();
   if (tls_index == TLS_OUT_OF_INDEXES)
     magic = 0;
   else
-    keys.insert (this);
+    for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++)
+      {
+       ULONG seq = keys[cnt].seq;
+       if (keys[cnt].unused () && keys[cnt].usable ()
+           && InterlockedCompareExchange (&keys[cnt].seq,
+                                          seq + 1, seq) == seq)
+         {
+           keys[cnt].key = this;
+           key_idx = cnt;
+           InterlockedCompareExchange (&keys[cnt].seq, seq + 3, seq + 1);
+           break;
+         }
+      }
 }
 
 pthread_key::~pthread_key ()
@@ -1685,7 +1698,14 @@ pthread_key::~pthread_key ()
    */
   if (magic != 0)
     {
-      keys.remove (this);
+      ULONG seq = keys[key_idx].seq;
+      if (!keys[key_idx].unused ())
+       {
+         while (keys[key_idx].busy ())
+           yield ();
+         keys[key_idx].key = NULL;
+         InterlockedCompareExchange (&keys[key_idx].seq, seq + 1, seq);
+       }
       TlsFree (tls_index);
     }
 }

Re: [PATCH] Cygwin: thread: Allow fast_mutex to be acquired multiple times.

Reply via email to