Hello, The refcounted basic_string implementation contains several data races on _M_refcount: 1. _M_is_leaked loads _M_refcount concurrently with mutations of _M_refcount. This loads needs to be memory_order_relaxed load, as _M_is_leaked predicate does not change under the feet. 2. _M_is_shared loads _M_refcount concurrently with mutations of _M_refcount. This loads needs to be memory_order_acquire, as another thread can drop _M_refcount to zero concurrently which makes the string non-shared and so the current thread can mutate the string. We need reads of the string in another thread (while it was shared) to happen-before the writes to the string in this thread (now that it is non-shared).
This patch adds __gnu_cxx::__atomic_load_dispatch function to do the loads of _M_refcount. The function does an acquire load. Acquire is non needed for _M_is_leaked, but for simplicity as still do acquire (hopefully the refcounted impl will go away in future). This patch also uses the new function to do loads of _M_refcount in string implementation. I did non update doc/xml/manual/concurrency_extensions.xml to document __gnu_cxx::__atomic_load_dispatch, because I am not sure we want to extend that api now that we have language-level atomics. If you still want me to update it, please say how to regenerate doc/html/manual/ext_concurrency.html. The race was detected with ThreadSanitizer on the following program: #define _GLIBCXX_USE_CXX11_ABI 0 #include <string> #include <thread> #include <iostream> #include <chrono> int main() { std::string s = "foo"; std::thread t([=](){ std::string x = s; std::cout << &x << std::endl; }); std::this_thread::sleep_for(std::chrono::seconds(1)); std::cout << &s[0] << std::endl; t.join(); } $ g++ string.cc -std=c++11 -pthread -O1 -g -fsanitize=thread $ ./a.out WARNING: ThreadSanitizer: data race (pid=98135) Read of size 4 at 0x7d080000efd0 by main thread: #0 std::string::_Rep::_M_is_leaked() const include/c++/6.0.0/bits/basic_string.h:2605 (a.out+0x000000401d7c) #1 std::string::_M_leak() include/c++/6.0.0/bits/basic_string.h:2730 (a.out+0x000000401d7c) #2 std::string::operator[](unsigned long) include/c++/6.0.0/bits/basic_string.h:3274 (a.out+0x000000401d7c) #3 main /tmp/string.cc:14 (a.out+0x000000401d7c) Previous atomic write of size 4 at 0x7d080000efd0 by thread T1: #0 __tsan_atomic32_fetch_add ../../../../libsanitizer/tsan/tsan_interface_atomic.cc:611 (libtsan.so.0+0x00000005811f) #1 __exchange_and_add include/c++/6.0.0/ext/atomicity.h:59 (a.out+0x000000401a19) #2 __exchange_and_add_dispatch include/c++/6.0.0/ext/atomicity.h:92 (a.out+0x000000401a19) #3 std::string::_Rep::_M_dispose(std::allocator<char> const&) include/c++/6.0.0/bits/basic_string.h:2659 (a.out+0x000000401a19) #4 std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string() include/c++/6.0.0/bits/basic_string.h:2961 (a.out+0x000000401a19) #5 ~<lambda> /tmp/string.cc:9 (a.out+0x000000401a19) #6 ~_Head_base include/c++/6.0.0/tuple:102 (a.out+0x000000401a19) #7 ~_Tuple_impl include/c++/6.0.0/tuple:338 (a.out+0x000000401a19) #8 ~tuple include/c++/6.0.0/tuple:521 (a.out+0x000000401a19) #9 ~_Bind_simple include/c++/6.0.0/functional:1503 (a.out+0x000000401a19) #10 ~_Impl include/c++/6.0.0/thread:107 (a.out+0x000000401a19) #11 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > > include/c++/6.0.0/ext/new_allocator.h:124 (a.out+0x0000004015c7) #12 _S_destroy<std::allocator<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > >, std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > > include/c++/6.0.0/bits/alloc_traits.h:236 (a.out+0x0000004015c7) #13 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > > include/c++/6.0.0/bits/alloc_traits.h:336 (a.out+0x0000004015c7) #14 _M_dispose include/c++/6.0.0/bits/shared_ptr_base.h:529 (a.out+0x0000004015c7) #15 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:150 (libstdc++.so.6+0x0000000b6da4) #16 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:662 (libstdc++.so.6+0x0000000b6da4) #17 std::__shared_ptr<std::thread::_Impl_base, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr() /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:928 (libstdc++.so.6+0x0000000b6da4) #18 std::shared_ptr<std::thread::_Impl_base>::~shared_ptr() /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr.h:93 (libstdc++.so.6+0x0000000b6da4) #19 execute_native_thread_routine libstdc++-v3/src/c++11/thread.cc:79 (libstdc++.so.6+0x0000000b6da4) Location is heap block of size 28 at 0x7d080000efc0 allocated by main thread: #0 operator new(unsigned long) ../../../../libsanitizer/tsan/tsan_interceptors.cc:571 (libtsan.so.0+0x0000000255a3) #1 __gnu_cxx::new_allocator<char>::allocate(unsigned long, void const*) /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/ext/new_allocator.h:104 (libstdc++.so.6+0x0000000cbaa8) #2 std::string::_Rep::_S_create(unsigned long, unsigned long, std::allocator<char> const&) /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:1051 (libstdc++.so.6+0x0000000cbaa8) #3 __libc_start_main <null> (libc.so.6+0x000000021ec4) Thread T1 (tid=98137, finished) created by main thread at: #0 pthread_create ../../../../libsanitizer/tsan/tsan_interceptors.cc:895 (libtsan.so.0+0x000000026d04) #1 __gthread_create /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662 (libstdc++.so.6+0x0000000b6e52) #2 std::thread::_M_start_thread(std::shared_ptr<std::thread::_Impl_base>, void (*)()) libstdc++-v3/src/c++11/thread.cc:149 (libstdc++.so.6+0x0000000b6e52) #3 __libc_start_main <null> (libc.so.6+0x000000021ec4) OK for trunk?
Index: include/bits/basic_string.h =================================================================== --- include/bits/basic_string.h (revision 227363) +++ include/bits/basic_string.h (working copy) @@ -2601,11 +2601,11 @@ bool _M_is_leaked() const _GLIBCXX_NOEXCEPT - { return this->_M_refcount < 0; } + { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) < 0; } bool _M_is_shared() const _GLIBCXX_NOEXCEPT - { return this->_M_refcount > 0; } + { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) > 0; } void _M_set_leaked() _GLIBCXX_NOEXCEPT Index: include/ext/atomicity.h =================================================================== --- include/ext/atomicity.h (revision 227363) +++ include/ext/atomicity.h (working copy) @@ -35,6 +35,16 @@ #include <bits/gthr.h> #include <bits/atomic_word.h> +// Even if the CPU doesn't need a memory barrier, we need to ensure +// that the compiler doesn't reorder memory accesses across the +// barriers. +#ifndef _GLIBCXX_READ_MEM_BARRIER +#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE) +#endif +#ifndef _GLIBCXX_WRITE_MEM_BARRIER +#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE) +#endif + namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -50,7 +60,7 @@ static inline void __atomic_add(volatile _Atomic_word* __mem, int __val) - { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } + { __atomic_fetch_add(__mem, __val, __ATOMIC_RELEASE); } #else _Atomic_word __attribute__ ((__unused__)) @@ -101,17 +111,27 @@ #endif } + static inline _Atomic_word + __attribute__ ((__unused__)) + __atomic_load_dispatch(const _Atomic_word* __mem) + { +#ifdef __GTHREADS + if (__gthread_active_p()) + { +#ifdef _GLIBCXX_ATOMIC_BUILTINS + return __atomic_load_n(__mem, __ATOMIC_ACQUIRE); +#else + // The best we can get with an old compiler. + _Atomic_word v = *(volatile _Atomic_word*)__mem; + _GLIBCXX_READ_MEM_BARRIER; + return v; +#endif + } +#endif + return *__mem; + } + _GLIBCXX_END_NAMESPACE_VERSION } // namespace -// Even if the CPU doesn't need a memory barrier, we need to ensure -// that the compiler doesn't reorder memory accesses across the -// barriers. -#ifndef _GLIBCXX_READ_MEM_BARRIER -#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE) -#endif -#ifndef _GLIBCXX_WRITE_MEM_BARRIER -#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE) -#endif - #endif