This adds a new implementation of std::counting_semaphore<Max> for the case where Max == 1, i.e. the std::binary_semaphore typedef. When the maximum counter value is 1 we don't need to load the current counter value before doing a compare-exchange to acquire the semaphore. We can just optimisitcally assume it's currently 1, and if that's true then the compare_exchange will succeed. This simplifies _M_try_acquire so that we don't need the separate _M_do_try_acquire function used by the general __semaphore_base implementation for _Max > 1 cases.
We can also use the simpler forms of atomic waiting that just take a value instead of a value accessor and predicate, because we know that the _M_counter is always a __platform_wait_t. This change adds a bare_wait flag to __atomic_wait_address_v because we don't need to track waiters for semaphores, we only need to notify when a semaphore with a count of zero is released. I'm not sure if this makes the code any faster in real scenarios, but the generated code for std::binary_semaphore is slightly smaller now. libstdc++-v3/ChangeLog: * include/bits/semaphore_base.h (__binary_semaphore_impl): New class with optimized implementation for std::binary_semaphore. (__semaphore_impl) <_max == 1>: Modify alias template to use __binary_semaphore_impl. * include/bits/atomic_wait.h (__atomic_wait_address_v): Add parameter for bare waits. --- Tested x86_64-linux. libstdc++-v3/include/bits/atomic_wait.h | 5 +- libstdc++-v3/include/bits/semaphore_base.h | 73 +++++++++++++++++++++- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h index 815726c16ccb..9ae11191d9ab 100644 --- a/libstdc++-v3/include/bits/atomic_wait.h +++ b/libstdc++-v3/include/bits/atomic_wait.h @@ -249,12 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // C++26 will return __val } + // Wait on __addr while *__addr == __old is true. inline void __atomic_wait_address_v(const __detail::__platform_wait_t* __addr, __detail::__platform_wait_t __old, - int __order) + int __order, bool __bare_wait = false) { - __detail::__wait_args __args{ __addr, __old, __order }; + __detail::__wait_args __args{ __addr, __old, __order, __bare_wait }; // C++26 will not ignore the return value here __detail::__wait_impl(__addr, __args); } diff --git a/libstdc++-v3/include/bits/semaphore_base.h b/libstdc++-v3/include/bits/semaphore_base.h index 3f7a33ccd51a..5446e57b0ab1 100644 --- a/libstdc++-v3/include/bits/semaphore_base.h +++ b/libstdc++-v3/include/bits/semaphore_base.h @@ -170,15 +170,82 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __old; } - private: + protected: alignas(_Platform_wait ? __detail::__platform_wait_alignment : __alignof__(__count_type)) __count_type _M_counter; }; - template<ptrdiff_t _Max> + // Optimized implementation for std::binary_semaphore with max == 1. + struct __binary_semaphore_impl : private __semaphore_base<true> + { + using _Base = __semaphore_base<true>; + using _Base::__count_type; + + static constexpr ptrdiff_t _S_max = 1; + + constexpr explicit + __binary_semaphore_impl(__count_type __count) noexcept + : _Base(__count > 0) + { } + + __binary_semaphore_impl(const __binary_semaphore_impl&) = delete; + __binary_semaphore_impl& operator=(const __binary_semaphore_impl&) = delete; + + using _Base::_M_get_current; + + _GLIBCXX_ALWAYS_INLINE bool + _M_try_acquire() noexcept + { + __count_type __val = 1; + return _Base::_M_do_try_acquire(__val); + } + + void + _M_acquire() noexcept + { + while (!_M_try_acquire()) + std::__atomic_wait_address_v(&_M_counter, 0, __ATOMIC_ACQUIRE, true); + } + + template<typename _Clock, typename _Duration> + bool + _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>& __atime) noexcept + { + while (!_M_try_acquire()) + { + if (!std::__atomic_wait_address_until_v(&_M_counter, 0, + __ATOMIC_ACQUIRE, + __atime, true)) + return false; // timed out + } + return true; + } + + template<typename _Rep, typename _Period> + bool + _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime) noexcept + { + while (!_M_try_acquire()) + { + if (!std::__atomic_wait_address_for_v(&_M_counter, 0, + __ATOMIC_ACQUIRE, + __rtime, true)) + return false; // timed out + } + return true; + } + + _GLIBCXX_ALWAYS_INLINE ptrdiff_t + _M_release(ptrdiff_t __update) noexcept + { return _Base::_M_release(__update > 0); } + }; + + template<ptrdiff_t _Max, + bool _PlatformWait = (_Max <= __semaphore_base<true>::_S_max)> using __semaphore_impl - = __semaphore_base<(_Max <= __semaphore_base<true>::_S_max)>; + = __conditional_t<_Max == 1, __binary_semaphore_impl, + __semaphore_base<_PlatformWait>>; _GLIBCXX_END_NAMESPACE_VERSION } // namespace std -- 2.49.0