This moves all non-dependent state and logic for std::barrier<C> into a new non-template base class, to avoid template bloat.
This would permit moving the _M_arrive function into the library instead of the header. libstdc++-v3/ChangeLog: * include/std/barrier (__tree_barrier_base): New class. (__tree_barrier): Move non-dependent code into __tree_barrier_base and derive from it. --- Tested x86_64-linux. libstdc++-v3/include/std/barrier | 176 +++++++++++++++++-------------- 1 file changed, 94 insertions(+), 82 deletions(-) diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier index 9c1de411f9ce..56270c99e056 100644 --- a/libstdc++-v3/include/std/barrier +++ b/libstdc++-v3/include/std/barrier @@ -81,77 +81,102 @@ It looks different from literature pseudocode for two main reasons: enum class __barrier_phase_t : unsigned char { }; - template<typename _CompletionF> - class __tree_barrier + struct __tree_barrier_base + { + static constexpr ptrdiff_t + max() noexcept + { return __PTRDIFF_MAX__ - 1; } + + protected: + using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; + using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; + static constexpr auto __phase_alignment = + __atomic_phase_ref_t::required_alignment; + + using __tickets_t = std::array<__barrier_phase_t, 64>; + struct alignas(64) /* naturally-align the heap state */ __state_t { - using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; - using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; - static constexpr auto __phase_alignment = - __atomic_phase_ref_t::required_alignment; + alignas(__phase_alignment) __tickets_t __tickets; + }; - using __tickets_t = std::array<__barrier_phase_t, 64>; - struct alignas(64) /* naturally-align the heap state */ __state_t - { - alignas(__phase_alignment) __tickets_t __tickets; - }; + ptrdiff_t _M_expected; + __atomic_base<__state_t*> _M_state{nullptr}; + __atomic_base<ptrdiff_t> _M_expected_adjustment{0}; + alignas(__phase_alignment) __barrier_phase_t _M_phase{}; - ptrdiff_t _M_expected; - __atomic_base<__state_t*> _M_state{nullptr}; - __atomic_base<ptrdiff_t> _M_expected_adjustment{0}; + explicit constexpr + __tree_barrier_base(ptrdiff_t __expected) + : _M_expected(__expected) + { + __glibcxx_assert(__expected >= 0 && __expected <= max()); + + if (!std::is_constant_evaluated()) + _M_state.store(_M_alloc_state().release(), memory_order_release); + } + + unique_ptr<__state_t[]> + _M_alloc_state() + { + size_t const __count = (_M_expected + 1) >> 1; + return std::make_unique<__state_t[]>(__count); + } + + bool + _M_arrive(__barrier_phase_t __old_phase, size_t __current) + { + const auto __old_phase_val = static_cast<unsigned char>(__old_phase); + const auto __half_step = + static_cast<__barrier_phase_t>(__old_phase_val + 1); + const auto __full_step = + static_cast<__barrier_phase_t>(__old_phase_val + 2); + + size_t __current_expected = _M_expected; + __current %= ((_M_expected + 1) >> 1); + + __state_t* const __state = _M_state.load(memory_order_relaxed); + + for (int __round = 0; ; ++__round) + { + if (__current_expected <= 1) + return true; + size_t const __end_node = ((__current_expected + 1) >> 1), + __last_node = __end_node - 1; + for ( ; ; ++__current) + { + if (__current == __end_node) + __current = 0; + auto __expect = __old_phase; + __atomic_phase_ref_t __phase(__state[__current] + .__tickets[__round]); + if (__current == __last_node && (__current_expected & 1)) + { + if (__phase.compare_exchange_strong(__expect, __full_step, + memory_order_acq_rel)) + break; // I'm 1 in 1, go to next __round + } + else if (__phase.compare_exchange_strong(__expect, __half_step, + memory_order_acq_rel)) + { + return false; // I'm 1 in 2, done with arrival + } + else if (__expect == __half_step) + { + if (__phase.compare_exchange_strong(__expect, __full_step, + memory_order_acq_rel)) + break; // I'm 2 in 2, go to next __round + } + } + __current_expected = __last_node + 1; + __current >>= 1; + } + } + }; + + template<typename _CompletionF> + class __tree_barrier : public __tree_barrier_base + { [[no_unique_address]] _CompletionF _M_completion; - alignas(__phase_alignment) __barrier_phase_t _M_phase{}; - - bool - _M_arrive(__barrier_phase_t __old_phase, size_t __current) - { - const auto __old_phase_val = static_cast<unsigned char>(__old_phase); - const auto __half_step = - static_cast<__barrier_phase_t>(__old_phase_val + 1); - const auto __full_step = - static_cast<__barrier_phase_t>(__old_phase_val + 2); - - size_t __current_expected = _M_expected; - __current %= ((_M_expected + 1) >> 1); - - __state_t* const __state = _M_state.load(memory_order_relaxed); - - for (int __round = 0; ; ++__round) - { - if (__current_expected <= 1) - return true; - size_t const __end_node = ((__current_expected + 1) >> 1), - __last_node = __end_node - 1; - for ( ; ; ++__current) - { - if (__current == __end_node) - __current = 0; - auto __expect = __old_phase; - __atomic_phase_ref_t __phase(__state[__current] - .__tickets[__round]); - if (__current == __last_node && (__current_expected & 1)) - { - if (__phase.compare_exchange_strong(__expect, __full_step, - memory_order_acq_rel)) - break; // I'm 1 in 1, go to next __round - } - else if (__phase.compare_exchange_strong(__expect, __half_step, - memory_order_acq_rel)) - { - return false; // I'm 1 in 2, done with arrival - } - else if (__expect == __half_step) - { - if (__phase.compare_exchange_strong(__expect, __full_step, - memory_order_acq_rel)) - break; // I'm 2 in 2, go to next __round - } - } - __current_expected = __last_node + 1; - __current >>= 1; - } - } - // _GLIBCXX_RESOLVE_LIB_DEFECTS // 3898. Possibly unintended preconditions for completion functions void _M_invoke_completion() noexcept { _M_completion(); } @@ -159,22 +184,10 @@ It looks different from literature pseudocode for two main reasons: public: using arrival_token = __barrier_phase_t; - static constexpr ptrdiff_t - max() noexcept - { return __PTRDIFF_MAX__ - 1; } - constexpr __tree_barrier(ptrdiff_t __expected, _CompletionF __completion) - : _M_expected(__expected), _M_completion(std::move(__completion)) - { - __glibcxx_assert(__expected >= 0 && __expected <= max()); - - if (!std::is_constant_evaluated()) - { - size_t const __count = (_M_expected + 1) >> 1; - _M_state.store(new __state_t[__count], memory_order_release); - } - } + : __tree_barrier_base(__expected), _M_completion(std::move(__completion)) + { } [[nodiscard]] arrival_token arrive(ptrdiff_t __update) @@ -191,8 +204,7 @@ It looks different from literature pseudocode for two main reasons: if (__cur == 0 && !_M_state.load(memory_order_relaxed)) [[unlikely]] { - size_t const __count = (_M_expected + 1) >> 1; - auto __p = make_unique<__state_t[]>(__count); + auto __p = _M_alloc_state(); __state_t* __val = nullptr; if (_M_state.compare_exchange_strong(__val, __p.get(), memory_order_seq_cst, -- 2.47.1