On Tue, 22 Jun 2021 at 10:07, Matthias Kretz wrote: > > On Monday, 21 June 2021 19:31:59 CEST Jonathan Wakely via Gcc-patches wrote: > > + // Lock the last element of the tuple, after all previous ones are > > locked. + template<int _Idx, typename... _Lockables> > > + inline __enable_if_t<_Idx + 1 == sizeof...(_Lockables), int> > > + __try_lock_impl(tuple<_Lockables&...>& __lockables) > > Couldn't you drop the need for enable_if and tuple if you define the function > like this? (Or - without constexpr if - two overloads with > __try_lock_impl(_L1& __l1) and __try_lock_impl(_L1& __l1, _L2& __l2, > _Lockables&... __lockables) > > template<typename _L1, typename... _Lockables> > inline int > __try_lock_impl(_L1& __l1, _Lockables&... __lockables) > { > if (auto __lock = __detail::__try_to_lock(__l1)) > { > if constexpr (sizeof...(_Lockables)) > { > int __idx = __detail::__try_lock_impl(__lockables...); > if (__idx >= 0) > return __idx + 1; > } > __lock.release(); > return -1; > } > else > return 0; > }
Yes, I did try something like that, but we can't use if-constexpr unconditionally. Doing it with two overloads still needed enable_if or tag dispatching, but that's because I retained the use of std::tuple, so was passing around the whole parameter pack. With your suggestion to also drop std::tuple the number of parameters decides which function we call. And we don't instantiate std::tuple. And we can also get rid of the __try_to_lock function, which was only used to deduce the lock type rather than use tuple_element to get it. That's much nicer. > > > [...] > > + template<typename _L0, typename... _L1> > > + void > > + __lock_impl(int& __i, int __depth, _L0& __l0, _L1&... __l1) > > + { > > How about optimizing a likely common case where all lockables have the same > type? In that case we don't require recursion and can manage stack usage much > simpler: The stack usage is bounded by the number of mutexes being locked, which is unlikely to get large, but we can do that. We can do it for try_lock too: template<typename _L1, typename _L2, typename... _L3> int try_lock(_L1& __l1, _L2& __l2, _L3&... __l3) { #if __cplusplus >= 201703L if constexpr (is_same_v<_L1, _L2> && (is_same_v<_L1, _L3> && ...)) { constexpr int _Np = 2 + sizeof...(_L3); unique_lock<_L1> __locks[_Np] = { {__l1, try_to_lock}, {__l2, try_to_lock}, {__l3, try_to_lock}... }; for (int __i = 0; __i < _Np; ++__i) if (!__locks[__i]) return __i; for (auto& __l : __locks) __l.release(); return -1; } else #endif return __detail::__try_lock_impl(__l1, __l2, __l3...); } > > if constexpr ((is_same_v<_L0, _L1> && ...)) > { > constexpr int _Np = 1 + sizeof...(_L1); > std::array<unique_lock<_L0>, _Np> __locks = { > {__l0, defer_lock}, {__l1, defer_lock}... > }; > int __first = 0; > do { > __locks[__first].lock(); > for (int __j = 1; __j < _Np; ++__j) > { > const int __idx = (__first + __j) % _Np; > if (!__locks[__idx].try_lock()) > { > for (int __k = __idx; __k != __first; > __k = __k == 1 ? _Np : __k - 1) > __locks[__k - 1].unlock(); This loop doesn't work if any try_lock fails when first==0, because the loop termination condition is never reached. I find this a bit easier to understand than the loop above, and correct (I think): for (int __k = __j; __k != 0; --__k) __locks[(__first + __k - 1) % _Np].unlock(); I'll finish testing the attached patch. I should probably add more tests, so that each test is run for a set of lockables of the same type, and also for lockables of different types.
commit 7d7cf35ed3c4b9d8c7fc4a52b4c7b1788c85c46d Author: Jonathan Wakely <jwak...@redhat.com> Date: Tue Jun 22 13:35:19 2021 libstdc++: Simplify std::try_lock and std::lock further The std::try_lock and std::lock algorithms can use iteration instead of recursion when all lockables have the same type and can be held by an array of unique_lock<L> objects. By making this change to __detail::__try_lock_impl it also benefits __detail::__lock_impl, which uses it. For std::lock we can just put the iterative version directly in std::lock, to avoid making any call to __detail::__lock_impl. Signed-off-by: Matthias Kretz <m.kr...@gsi.de> Signed-off-by: Jonathan Wakely <jwak...@redhat.com> Co-authored-by: Matthias Kretz <m.kr...@gsi.de> libstdc++-v3/ChangeLog: * include/std/mutex (lock): Replace recursion with iteration when lockables all have the same type. (__detail::__try_lock_impl): Likewise. Pass lockables as parameters, instead of a tuple. Always lock the first one, and recurse for the rest. (__detail::__lock_impl): Adjust call to __try_lock_impl. (__detail::__try_to_lock): Remove. diff --git a/libstdc++-v3/include/std/mutex b/libstdc++-v3/include/std/mutex index 5f2d8f9ee7b..d1ba0c95397 100644 --- a/libstdc++-v3/include/std/mutex +++ b/libstdc++-v3/include/std/mutex @@ -514,39 +514,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /// @cond undocumented namespace __detail { + // Lock the last lockable, after all previous ones are locked. template<typename _Lockable> - inline unique_lock<_Lockable> - __try_to_lock(_Lockable& __l) - { return unique_lock<_Lockable>{__l, try_to_lock}; } - - // Lock the last element of the tuple, after all previous ones are locked. - template<int _Idx, typename... _Lockables> - inline __enable_if_t<_Idx + 1 == sizeof...(_Lockables), int> - __try_lock_impl(tuple<_Lockables&...>& __lockables) + inline int + __try_lock_impl(_Lockable& __lockable) { - if (auto __lock = __detail::__try_to_lock(std::get<_Idx>(__lockables))) + if (unique_lock<_Lockable> __lock{__lockable, try_to_lock}) { __lock.release(); return -1; } else - return _Idx; + return 0; } - // Lock tuple elements starting from _Idx. - template<int _Idx, typename... _Lockables> - inline __enable_if_t<_Idx + 1 != sizeof...(_Lockables), int> - __try_lock_impl(tuple<_Lockables&...>& __lockables) + // Lock each lockable in turn. + template<typename _L0, typename... _Lockables> + inline int + __try_lock_impl(_L0& __l0, _Lockables&... __lockables) { - if (auto __lock = __detail::__try_to_lock(std::get<_Idx>(__lockables))) +#if __cplusplus >= 201703L + if constexpr ((is_same_v<_L0, _Lockables> && ...)) { - int __idx = __detail::__try_lock_impl<_Idx + 1>(__lockables); - if (__idx == -1) - __lock.release(); - return __idx; + constexpr int _Np = 1 + sizeof...(_Lockables); + unique_lock<_L0> __locks[_Np] = { + {__l0, try_to_lock}, {__lockables, try_to_lock}... + }; + for (int __i = 0; __i < _Np; ++__i) + if (!__locks[__i]) + return __i; + for (auto& __l : __locks) + __l.release(); + return -1; } else - return _Idx; +#endif + if (unique_lock<_L0> __lock{__l0, try_to_lock}) + { + int __idx = __detail::__try_lock_impl(__lockables...); + if (__idx == -1) + { + __lock.release(); + return -1; + } + return __idx + 1; + } + else + return 0; } } // namespace __detail @@ -562,12 +576,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * Sequentially calls try_lock() on each argument. */ - template<typename _Lock1, typename _Lock2, typename... _Lock3> + template<typename _L1, typename _L2, typename... _L3> int - try_lock(_Lock1& __l1, _Lock2& __l2, _Lock3&... __l3) + try_lock(_L1& __l1, _L2& __l2, _L3&... __l3) { - auto __lockables = std::tie(__l1, __l2, __l3...); - return __detail::__try_lock_impl<0>(__lockables); + return __detail::__try_lock_impl(__l1, __l2, __l3...); } /// @cond undocumented @@ -589,8 +602,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION int __failed = 1; // index that couldn't be locked { unique_lock<_L0> __first(__l0); - auto __rest = std::tie(__l1...); - __failed += __detail::__try_lock_impl<0>(__rest); + __failed += __detail::__try_lock_impl(__l1...); if (!__failed) { __i = -1; // finished @@ -620,15 +632,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @post All arguments are locked. * * All arguments are locked via a sequence of calls to lock(), try_lock() - * and unlock(). If the call exits via an exception any locks that were - * obtained will be released. + * and unlock(). If this function exits via an exception any locks that + * were obtained will be released. */ template<typename _L1, typename _L2, typename... _L3> void lock(_L1& __l1, _L2& __l2, _L3&... __l3) { - int __i = 0; - __detail::__lock_impl(__i, 0, __l1, __l2, __l3...); +#if __cplusplus >= 201703L + if constexpr (is_same_v<_L1, _L2> && (is_same_v<_L1, _L3> && ...)) + { + constexpr int _Np = 2 + sizeof...(_L3); + unique_lock<_L1> __locks[] = { + {__l1, defer_lock}, {__l2, defer_lock}, {__l3, defer_lock}... + }; + int __first = 0; + do { + __locks[__first].lock(); + for (int __j = 1; __j < _Np; ++__j) + { + const int __idx = (__first + __j) % _Np; + if (!__locks[__idx].try_lock()) + { + for (int __k = __j; __k != 0; --__k) + __locks[(__first + __k - 1) % _Np].unlock(); + __first = __idx; + break; + } + } + } while (!__locks[__first]); + + for (auto& __l : __locks) + __l.release(); + } + else +#endif + { + int __i = 0; + __detail::__lock_impl(__i, 0, __l1, __l2, __l3...); + } } #if __cplusplus >= 201703L