Here's the patch as applied to trunk in rev. 191042. I'll apply it to 4.7 this weekend as long as nobody yelps.
-benjamin
2012-09-06 Thiago Macieira <thiago.macie...@intel.com> PR libstdc++/54172 * libsupc++/guard.cc (__cxa_guard_acquire): Exit the loop earlier if we detect that another thread has had success. Don't compare_exchange from a finished state back to a waiting state. Comment. diff --git a/libstdc++-v3/libsupc++/guard.cc b/libstdc++-v3/libsupc++/guard.cc index adc9608..60165cd 100644 --- a/libstdc++-v3/libsupc++/guard.cc +++ b/libstdc++-v3/libsupc++/guard.cc @@ -244,13 +244,13 @@ namespace __cxxabiv1 if (__gthread_active_p ()) { int *gi = (int *) (void *) g; - int expected(0); const int guard_bit = _GLIBCXX_GUARD_BIT; const int pending_bit = _GLIBCXX_GUARD_PENDING_BIT; const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT; while (1) { + int expected(0); if (__atomic_compare_exchange_n(gi, &expected, pending_bit, false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) @@ -264,13 +264,26 @@ namespace __cxxabiv1 // Already initialized. return 0; } + if (expected == pending_bit) { + // Use acquire here. int newv = expected | waiting_bit; if (!__atomic_compare_exchange_n(gi, &expected, newv, false, __ATOMIC_ACQ_REL, - __ATOMIC_RELAXED)) - continue; + __ATOMIC_ACQUIRE)) + { + if (expected == guard_bit) + { + // Make a thread that failed to set the + // waiting bit exit the function earlier, + // if it detects that another thread has + // successfully finished initialising. + return 0; + } + if (expected == 0) + continue; + } expected = newv; }