Hi Corinna, On Mon, 24 Mar 2025 16:35:08 +0100 Corinna Vinschen wrote: > On Mar 24 22:05, Takashi Yano wrote: > > Hi Corinna, > > > > On Mon, 24 Mar 2025 11:57:06 +0100 > > Corinna Vinschen wrote: > > > On Mar 24 14:53, Takashi Yano wrote: > > > > Previously, the fast_mutex defined in thread.h could not be aquired > > > > multiple times, i.e., the thread causes deadlock if it attempted to > > > > acquire a lock already acquired by the thread. For example, a deadlock > > > > occurs if another pthread_key_create() is called in the destructor > > > > specified in the previous pthread_key_create(). This is because the > > > > run_all_destructors() calls the desructor via keys.for_each() where > > > > both for_each() and pthread_key_create() (that calls List_insert()) > > > > attempt to acquire the lock. With this patch, the fast_mutex can be > > > > acquired multiple times by the same thread similar to the behaviour > > > > of a Windows mutex. In this implementation, the mutex is released > > > > only when the number of unlock() calls matches the number of lock() > > > > calls. > > > > > > Doesn't that mean fast_mutex is now the same thing as muto? The > > > muto type was recursive from the beginning. It's kind of weird > > > to maintain two lock types which are equivalent. > > > > I have just looked at muto implementation. Yeah, it looks very > > similar to fast_mutex with this patch. However, the performance > > is different. fast_mutex with this patch is two times faster > > than muto when just repeatedly locking/unlocking. If two threads > > compete for the same mutex, the performance is almost the same. > > Ok, nice to know. With fast_mutex being mostly faster and being > recursive with your patch, maybe we could replace all mutos with > this fast_mutex? > > > > I wonder if we shouldn't drop the keys list structure entirely, and > > > convert "keys" to a simple sequence number + destructor array, as in > > > GLibc. This allows lockless key operations and drop the entire list and > > > mutex overhead. The code would become dirt-easy, see > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_create.c > > > https://sourceware.org/cgit/glibc/tree/nptl/pthread_key_delete.c > > > > > > What do you think? > > > > It looks very simple and reasonable to me. > > > > > However, for 3.6.1, the below patch should be ok. > > > > What about reimplementing pthread_key_create/pthread_key_delete > > based on glibc for master branch, and appling this patch to > > cygwin-3_6-branch? > > > > Shall I try to reimplement them? > > That would be great!
What about the patch attached? Is this as you intended? -- Takashi Yano <takashi.y...@nifty.ne.jp>
diff --git a/winsup/cygwin/local_includes/thread.h b/winsup/cygwin/local_includes/thread.h index b3496281e..d182d91ce 100644 --- a/winsup/cygwin/local_includes/thread.h +++ b/winsup/cygwin/local_includes/thread.h @@ -221,13 +221,28 @@ public: ~pthread_key (); static void fixup_before_fork () { - keys.for_each (&pthread_key::_fixup_before_fork); + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + if (!keys[cnt].unused ()) + { + while (keys[cnt].busy ()) + Sleep (0L); + keys[cnt].key->_fixup_before_fork (); + } + } } static void fixup_after_fork () { - keys.fixup_after_fork (); - keys.for_each (&pthread_key::_fixup_after_fork); + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + if (!keys[cnt].unused ()) + { + while (keys[cnt].busy ()) + Sleep (0L); + keys[cnt].key->_fixup_after_fork (); + } + } } static void run_all_destructors () @@ -246,16 +261,32 @@ public: for (int i = 0; i < PTHREAD_DESTRUCTOR_ITERATIONS; ++i) { iterate_dtors_once_more = false; - keys.for_each (&pthread_key::run_destructor); + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + if (!keys[cnt].unused ()) + { + while (keys[cnt].busy ()) + Sleep (0L); + keys[cnt].key->run_destructor (); + } + } if (!iterate_dtors_once_more) break; } } - /* List support calls */ - class pthread_key *next; private: - static List<pthread_key> keys; + int key_idx; + static class keys_list { + ULONG seq; + pthread_key *key; + bool unused () { return (seq & 3) == 0; } + bool usable () { return seq < seq + 4; } + bool busy () { return (seq & 3) == 1; } + public: + keys_list () : seq (0), key (NULL) {} + friend class pthread_key; + } keys[PTHREAD_KEYS_MAX]; void _fixup_before_fork (); void _fixup_after_fork (); void (*destructor) (void *); diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc index 9ee96504b..a6a9962b4 100644 --- a/winsup/cygwin/thread.cc +++ b/winsup/cygwin/thread.cc @@ -1666,17 +1666,30 @@ pthread_rwlock::_fixup_after_fork () /* pthread_key */ /* static members */ /* This stores pthread_key information across fork() boundaries */ -List<pthread_key> pthread_key::keys; +pthread_key::keys_list pthread_key::keys[PTHREAD_KEYS_MAX]; /* non-static members */ -pthread_key::pthread_key (void (*aDestructor) (void *)):verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor) +pthread_key::pthread_key (void (*aDestructor) (void *)) : + verifyable_object (PTHREAD_KEY_MAGIC), destructor (aDestructor) { tls_index = TlsAlloc (); if (tls_index == TLS_OUT_OF_INDEXES) magic = 0; else - keys.insert (this); + for (size_t cnt = 0; cnt < PTHREAD_KEYS_MAX; cnt++) + { + ULONG seq = keys[cnt].seq; + if (keys[cnt].unused () && keys[cnt].usable () + && InterlockedCompareExchange (&keys[cnt].seq, + seq + 1, seq) == seq) + { + keys[cnt].key = this; + key_idx = cnt; + InterlockedCompareExchange (&keys[cnt].seq, seq + 3, seq + 1); + break; + } + } } pthread_key::~pthread_key () @@ -1685,7 +1698,14 @@ pthread_key::~pthread_key () */ if (magic != 0) { - keys.remove (this); + ULONG seq = keys[key_idx].seq; + if (!keys[key_idx].unused ()) + { + while (keys[key_idx].busy ()) + yield (); + keys[key_idx].key = NULL; + InterlockedCompareExchange (&keys[key_idx].seq, seq + 1, seq); + } TlsFree (tls_index); } }