This adds a new implementation of std::counting_semaphore<Max> for the
case where Max == 1, i.e. the std::binary_semaphore typedef. When the
maximum counter value is 1 we don't need to load the current counter
value before doing a compare-exchange to acquire the semaphore. We can
just optimisitcally assume it's currently 1, and if that's true then the
compare_exchange will succeed. This simplifies _M_try_acquire so that we
don't need the separate _M_do_try_acquire function used by the general
__semaphore_base implementation for _Max > 1 cases.

We can also use the simpler forms of atomic waiting that just take a
value instead of a value accessor and predicate, because we know that
the _M_counter is always a __platform_wait_t. This change adds a
bare_wait flag to __atomic_wait_address_v because we don't need to track
waiters for semaphores, we only need to notify when a semaphore with a
count of zero is released.

I'm not sure if this makes the code any faster in real scenarios, but
the generated code for std::binary_semaphore is slightly smaller now.

libstdc++-v3/ChangeLog:

        * include/bits/semaphore_base.h (__binary_semaphore_impl): New
        class with optimized implementation for std::binary_semaphore.
        (__semaphore_impl) <_max == 1>: Modify alias template to use
        __binary_semaphore_impl.
        * include/bits/atomic_wait.h (__atomic_wait_address_v): Add
        parameter for bare waits.
---

Tested x86_64-linux.

 libstdc++-v3/include/bits/atomic_wait.h    |  5 +-
 libstdc++-v3/include/bits/semaphore_base.h | 73 +++++++++++++++++++++-
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_wait.h 
b/libstdc++-v3/include/bits/atomic_wait.h
index 815726c16ccb..9ae11191d9ab 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -249,12 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // C++26 will return __val
     }
 
+  // Wait on __addr while *__addr == __old is true.
   inline void
   __atomic_wait_address_v(const __detail::__platform_wait_t* __addr,
                          __detail::__platform_wait_t __old,
-                         int __order)
+                         int __order, bool __bare_wait = false)
   {
-    __detail::__wait_args __args{ __addr, __old, __order };
+    __detail::__wait_args __args{ __addr, __old, __order, __bare_wait };
     // C++26 will not ignore the return value here
     __detail::__wait_impl(__addr, __args);
   }
diff --git a/libstdc++-v3/include/bits/semaphore_base.h 
b/libstdc++-v3/include/bits/semaphore_base.h
index 3f7a33ccd51a..5446e57b0ab1 100644
--- a/libstdc++-v3/include/bits/semaphore_base.h
+++ b/libstdc++-v3/include/bits/semaphore_base.h
@@ -170,15 +170,82 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __old;
     }
 
-  private:
+  protected:
     alignas(_Platform_wait ? __detail::__platform_wait_alignment
                           : __alignof__(__count_type))
     __count_type _M_counter;
   };
 
-  template<ptrdiff_t _Max>
+  // Optimized implementation for std::binary_semaphore with max == 1.
+  struct __binary_semaphore_impl : private __semaphore_base<true>
+  {
+    using _Base = __semaphore_base<true>;
+    using _Base::__count_type;
+
+    static constexpr ptrdiff_t _S_max = 1;
+
+    constexpr explicit
+    __binary_semaphore_impl(__count_type __count) noexcept
+    : _Base(__count > 0)
+    { }
+
+    __binary_semaphore_impl(const __binary_semaphore_impl&) = delete;
+    __binary_semaphore_impl& operator=(const __binary_semaphore_impl&) = 
delete;
+
+    using _Base::_M_get_current;
+
+    _GLIBCXX_ALWAYS_INLINE bool
+    _M_try_acquire() noexcept
+    {
+      __count_type __val = 1;
+      return _Base::_M_do_try_acquire(__val);
+    }
+
+    void
+    _M_acquire() noexcept
+    {
+      while (!_M_try_acquire())
+       std::__atomic_wait_address_v(&_M_counter, 0, __ATOMIC_ACQUIRE, true);
+    }
+
+    template<typename _Clock, typename _Duration>
+      bool
+      _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>& 
__atime) noexcept
+      {
+       while (!_M_try_acquire())
+         {
+           if (!std::__atomic_wait_address_until_v(&_M_counter, 0,
+                                                   __ATOMIC_ACQUIRE,
+                                                   __atime, true))
+             return false; // timed out
+         }
+       return true;
+      }
+
+    template<typename _Rep, typename _Period>
+      bool
+      _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime) 
noexcept
+      {
+       while (!_M_try_acquire())
+         {
+           if (!std::__atomic_wait_address_for_v(&_M_counter, 0,
+                                                 __ATOMIC_ACQUIRE,
+                                                 __rtime, true))
+             return false; // timed out
+         }
+       return true;
+      }
+
+    _GLIBCXX_ALWAYS_INLINE ptrdiff_t
+    _M_release(ptrdiff_t __update) noexcept
+    { return _Base::_M_release(__update > 0); }
+  };
+
+  template<ptrdiff_t _Max,
+          bool _PlatformWait = (_Max <= __semaphore_base<true>::_S_max)>
     using __semaphore_impl
-      = __semaphore_base<(_Max <= __semaphore_base<true>::_S_max)>;
+      = __conditional_t<_Max == 1, __binary_semaphore_impl,
+                       __semaphore_base<_PlatformWait>>;
 
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
-- 
2.49.0

Reply via email to