Here is more detailed information to demo the infinite loop due to corruption of the list, using one "-g" compiled cygwin1.dll in my cygwin: After the job hang, the stacks are: (gdb) info thread Id Target Id Frame * 11 Thread 15220.0x1ef0 0x000000007711afb1 in ntdll!DbgBreakPoint () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 10 Thread 15220.0x1274 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 9 Thread 15220.0x28c4 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 8 Thread 15220.0x3f1c 0x000000018021b9e6 in void List_remove<pthread_mutex>(fast_mutex&, pthread_mutex*&, pthread_mutex*) () from /usr/bin/cygwin1.dll 7 Thread 15220.0x20a8 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 6 Thread 15220.0x1980 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 5 Thread 15220.0x389c 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 4 Thread 15220.0x3c1c 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 3 Thread 15220.0x3290 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 2 Thread 15220.0x3db0 0x000000007711bd9a in ntdll!ZwReadFile () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll 1 Thread 15220.0xf6c 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll (gdb) thread 8 [Switching to thread 8 (Thread 15220.0x3f1c)] #0 0x000000018021b9e6 in void List_remove<pthread_mutex>(fast_mutex&, pthread_mutex*&, pthread_mutex*) () from /usr/bin/cygwin1.dll (gdb) x/10i $pc => 0x18021b9e6 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+150>: mov -0x8(%rbp),%rax 0x18021b9ea <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+154>: mov 0x10(%rax),%rax 0x18021b9ee <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+158>: mov %rax,-0x8(%rbp) 0x18021b9f2 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+162>: jmp 0x18021b9cb <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+123> 0x18021b9f4 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+164>: mov -0x8(%rbp),%rax 0x18021b9f8 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+168>: mov 0x10(%rax),%rax 0x18021b9fc <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+172>: cmp 0x20(%rbp),%rax 0x18021ba00 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+176>: jne 0x18021ba16 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+198> 0x18021ba02 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+178>: mov -0x8(%rbp),%rax 0x18021ba06 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+182>: mov 0x10(%rax),%rax (gdb) x/10i $pc-150+123 0x18021b9cb <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+123>: mov -0x8(%rbp),%rax 0x18021b9cf <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+127>: mov 0x10(%rax),%rax 0x18021b9d3 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+131>: test %rax,%rax 0x18021b9d6 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+134>: je 0x18021b9f4 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+164> 0x18021b9d8 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+136>: mov -0x8(%rbp),%rax 0x18021b9dc <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+140>: mov 0x10(%rax),%rax 0x18021b9e0 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+144>: cmp 0x20(%rbp),%rax 0x18021b9e4 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+148>: je 0x18021b9f4 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+164> => 0x18021b9e6 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+150>: mov -0x8(%rbp),%rax 0x18021b9ea <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+154>: mov 0x10(%rax),%rax (gdb) 0x18021b9ee <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+158>: mov %rax,-0x8(%rbp) 0x18021b9f2 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+162>: jmp 0x18021b9cb <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+123> 0x18021b9f4 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+164>: mov -0x8(%rbp),%rax 0x18021b9f8 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+168>: mov 0x10(%rax),%rax 0x18021b9fc <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+172>: cmp 0x20(%rbp),%rax 0x18021ba00 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+176>: jne 0x18021ba16 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+198> 0x18021ba02 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+178>: mov -0x8(%rbp),%rax 0x18021ba06 <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+182>: mov 0x10(%rax),%rax 0x18021ba0a <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+186>: mov 0x10(%rax),%rdx 0x18021ba0e <_Z11List_removeI13pthread_mutexEvR10fast_mutexRPT_S4_+190>: mov -0x8(%rbp),%rax
(gdb) x/gx $rbp-8 #cur 0xff3fca98: 0x0000000601e147c0 (gdb) x/gx 0x0000000601e147c0+0x10 #cur->next 0x601e147d0: 0x0000000601e147c0 #identical to cur, into infinite-loop refer to the source code in thread.h: template <class list_node> inline void List_remove (fast_mutex &mx, list_node *&head, list_node *node) { if (!node) return; mx.lock (); if (head) { if (InterlockedCompareExchangePointer ((PVOID volatile *) &head, node->next, node) != node) { list_node *cur = head; while (cur->next && node != cur->next) cur = cur->next; if (node == cur->next) cur->next = cur->next->next; } } mx.unlock (); } Re-paste the sample code test-thread.cpp to demo the infinite loop (I fixed the space deletion issue in my yahoo email): #include <stdio.h> #include <stdlib.h> #include <thread> #include <vector> #include <string> #include <ctime> #include <climits> #include <cassert> #include <mutex> #include <condition_variable> #include <deque> #include <mutex> #include <pthread.h> #include <cstdint> using namespace std; template<class T> class Queue { typedef std::deque<T*> Container; public: Queue(int _capacity = std::numeric_limits<int>::max()) : capacity(_capacity), closed(false) {} bool enqueue(T* d) { if (closed) return false; std::unique_lock<std::mutex> lock(mutex); if (d == 0) { closed = true; empty_cv.notify_all(); return true; } while (data.size() >= capacity) full_cv.wait(lock); data.push_back(d); empty_cv.notify_one(); return true; } T* dequeue() { std::unique_lock<std::mutex> lock(mutex); while (data.empty() && !closed) empty_cv.wait(lock); if (data.size()) { T* d = data.front(); data.pop_front(); full_cv.notify_one(); return d; } else return 0; } size_t size() const { return data.size(); } private: std::mutex mutex; std::condition_variable full_cv, empty_cv; uint32_t capacity; bool closed; Container data; }; struct Node { int data; }; struct Job { Node* node; Queue<Node>* recycle; }; static Queue<Job> jobs; static void* handle_job(void* arg) { long ithr = (long)arg; unsigned long seed = time(0) + ithr*1000000; int NS = 1000; while (Job* j = jobs.dequeue()) { struct timespec ts; ts.tv_sec = 0; seed = seed * 1103515245 + 12345; ts.tv_nsec = seed%NS; nanosleep(&ts, 0); j->recycle->enqueue(j->node); delete j; } } struct Task { Queue<Node> recycle; int size; // number of sub jobs Task(int N) : size(N) { for (int i = 0; i<N; ++i) { Node* t = new Node(); t->data = i; Job* job = new Job; job->node = t; job->recycle = &recycle; jobs.enqueue(job); } } ~Task() { int i = 0; while (Node* t = recycle.dequeue()) { delete t; if (++i == size) break; } } }; static string timestamp() { time_t t; struct tm tmp; char buf[80]; t = time(NULL); localtime_r(&t, &tmp); strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", &tmp); return buf; } static void* create_task(void* arg) { long ithr = (long)arg; int TASK_NUM = 1000000; int one_percent = TASK_NUM/100; int TASK_SUB_JOB_NUM = 1; int NS = 1000; unsigned long seed = time(0) + ithr*10000; int i = 0; for (; i < TASK_NUM; ++i) { struct timespec ts; ts.tv_sec = 0; seed = seed * 1103515245 + 12345; ts.tv_nsec = seed%NS; nanosleep(&ts, 0); if (i%one_percent == 0) { fprintf(stderr, "%s: create_task[%d]: %d%% done\n", timestamp().c_str(), ithr, i/one_percent); } Task task(TASK_SUB_JOB_NUM); } fprintf(stderr, "%s: create_task[%d]: %d%% done\n", timestamp().c_str(), ithr, i/one_percent); } int main() { int NTHR_HANDLE_JOB = 4, NTHR_CREATE_TASK = 4; std::vector<pthread_t> threads(NTHR_HANDLE_JOB+NTHR_CREATE_TASK); int k = 0; for (long i = 0; i < NTHR_HANDLE_JOB; ++i) { pthread_create(&threads[k++], NULL, handle_job, (void*)i); } for (long i = 0; i < NTHR_CREATE_TASK; ++i) { pthread_create(&threads[k++], NULL, create_task, (void*)i); } // wait for create_task thread for (size_t i = NTHR_HANDLE_JOB; i < threads.size(); ++i) { pthread_join(threads[i], NULL); } jobs.enqueue(0); // wait for handle_job thread for (size_t i = 0; i < NTHR_HANDLE_JOB; ++i) pthread_join(threads[i], NULL); } -------------------------------------------- On Fri, 12/1/17, Corinna Vinschen <corinna-cyg...@cygwin.com> wrote: Subject: Re: mixed usage of lock protection and lock-free List template class in thread.h To: cygwin@cygwin.com Date: Friday, December 1, 2017, 9:15 AM On Dec 1 16:45, Xiaofeng Liu via cygwin wrote: > Lock protection and lock-free should never be mixed ! > https://cygwin.com/git/gitweb.cgi?p=newlib-cygwin.git;a=blob;f=winsup/cygwin/thread.h;hb=1f42dc2bcf58d3b8629eb13d53de3f69fc314b47#l110 > > 110 template <class list_node> inline void 111 List_insert (list_node *&head, list_node *node) 112 { 113 if (!node) 114 return; 115 do 116 node->next = head; 117 while (InterlockedCompareExchangePointer ((PVOID volatile *) &head, 118 node, node->next) != node->next); 119 } 120 121 template <class list_node> inline void 122 List_remove (fast_mutex &mx, list_node *&head, list_node *node) 123 { 124 if (!node) 125 return; 126 mx.lock (); 127 if (head) 128 { 129 if (InterlockedCompareExchangePointer ((PVOID volatile *) &head, 130 node->next, node) != node) 131 { 132 list_node *cur = head; 133 134 while (cur->next && node != cur->next) 135 cur = cur->next; 136 if (node == cur->next) 137 cur->next = cur->next->next; 138 } 139 } 140 mx.unlock (); 141 } > The symptom I met is a job hang with the following stack: > #0 0x000000007711c2ea in ntdll!ZwWaitForMultipleObjects () from /cygdrive/c/Windows/SYSTEM32/ntdll.dll > #1 0x000007fefd111430 in KERNELBASE!GetCurrentProcess () from /cygdrive/c/Windows/system32/KERNELBASE.dll > #2 0x0000000076fc06c0 in WaitForMultipleObjects () from /cygdrive/c/Windows/system32/kernel32.dll > #3 0x00000001800458ac in cygwait(void*, _LARGE_INTEGER*, unsigned int) () from /usr/bin/cygwin1.dll > #4 0x000000018013d029 in pthread_cond::~pthread_cond() () from /usr/bin/cygwin1.dll > #5 0x000000018013d0dd in pthread_cond::~pthread_cond() () from /usr/bin/cygwin1.dll > #6 0x0000000180141196 in pthread_cond_destroy () from /usr/bin/cygwin1.dll > #7 0x0000000180116d5b in _sigfe () from /usr/bin/cygwin1.dll > #8 0x0000000100908e38 in std::_Sp_counted_ptr_inplace<std::__future_base::_Task_state<std::function<void ()>, std::allocator<int>, void ()>, std::allocator<int>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() () > The problem with the current implementation for concurrent insert and delete is explained at WikipediaNon-blocking linked list > > My question is how to solve this ? Adding lock protection in List_insert (removing lock-freee) or implementing a complete lock-free List based on Harris's solution to use two CAS? First of all, please, please, please fix your MUA! Just like your mails to cygwin-patches a couple of weeks ago, your mails are pretty much unreadable due to broken line wrapping. Back to business: This code is working since 2003. So, is that just a theoretical problem, or a practical one? If the latter, what is broken exactly? However, since you're asking, a lockless implementation where appropriate is always welcome. Thanks, Corinna -- Corinna Vinschen Please, send mails regarding Cygwin to Cygwin Maintainer cygwin AT cygwin DOT com Red Hat -----Inline Attachment Follows----- -- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple