On Fri, Jun 13, 2025 at 3:54 PM Patrick Palka <ppa...@redhat.com> wrote:
> On Fri, 13 Jun 2025, Tomasz Kaminski wrote: > > > > > > > On Thu, Jun 12, 2025 at 9:05 PM Patrick Palka <ppa...@redhat.com> wrote: > > On Thu, 12 Jun 2025, Patrick Palka wrote: > > > > > On Thu, 12 Jun 2025, Jonathan Wakely wrote: > > > > > > > > > > > > > > > On Thu, 12 Jun 2025, 16:56 Patrick Palka, <ppa...@redhat.com> > wrote: > > > > Tested on x86_64-pc-linux-gnu, does this look OK for > trunk? > > > > > > > > I'm sure if introducing a new overload is preferable to > > > > introducing a constexpr if branch in the existing > overload. > > > > > > > > > > > > I think a constexpr if branch in the existing functions is > cheaper. We need to check the traits either way, but we can avoid doing > overload resolution. > > > > > > Ack. I opted to just define specialized functors for these > helpers > > > instead of using lambdas. That way we can easily optimize the > case > > > where only one of the functions is stateless. > > > > > > Changes in v2: > > > - Use a functor utilizing [[no_unique_address]] instead of a > lambda, > > > to also concisely handle the case where only one of the > functino > > > object is stateless. In turn check is_copy_xible instead of > > > is_default_xible. > > > > > > -- >8 -- > > > > > > When creating a composite comparator/predicate that invokes a > given > > > projection function, we don't need to capture a stateless > function > > > object by reference, instead we can capture it by value and use > > > [[no_unique_address]] to elide its storage. This makes using > > > __make_comp_proj zero-cost in the common case where the > comparator or > > > projection (or both) are stateless. > Update description to mention function/member pointers, where we are also at zero cost compared to passing them by value. Outside of this note looks good to me. > > > > > > libstdc++-v3/ChangeLog: > > > > > > * include/bits/ranges_algo.h (__detail::_Comp_proj): New. > > > (__detail::__make_comp_proj): Use it instead. > > > (__detail::_Pred_proj): New. > > > (__detail::__make_pred_proj): Use it instead. > > > --- > > > libstdc++-v3/include/bits/ranges_algo.h | 70 > +++++++++++++++++++------ > > > 1 file changed, 54 insertions(+), 16 deletions(-) > > > > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > b/libstdc++-v3/include/bits/ranges_algo.h > > > index 9f94c6b1d57e..ce48ca047b69 100644 > > > --- a/libstdc++-v3/include/bits/ranges_algo.h > > > +++ b/libstdc++-v3/include/bits/ranges_algo.h > > > @@ -49,27 +49,65 @@ namespace ranges > > > namespace __detail > > > { > > > template<typename _Comp, typename _Proj> > > > - constexpr auto > > > + struct _Comp_proj > > > + { > > > + [[no_unique_address]] > > > + __conditional_t<is_empty_v<_Comp> && > is_copy_constructible_v<_Comp>, > > > + _Comp, _Comp&> _M_comp; > > > > Comparators, predicates and projections in ranges algorithms are > already > > constrained to by copyable, so checking is_copy_constructible_v > here > > is redundant. > > > > -- >8 -- > > > > Subject: [PATCH] libstdc++: Optimize __make_comp_proj and > __make_pred_proj > > > > Changes in v3: > > - Remove redundant is_copy_constructible check when deciding > > whether to capture by reference or by value. > > > > Changes in v2: > > - Use a functor utilizing [[no_unique_address]] instead of a > lambda, > > to also concisely handle the case where only one of the > functino > > object is stateless. In turn check is_copy_xible instead of > > is_default_xible. > > > > -- >8 -- > > > > When creating a composite comparator/predicate that invokes a given > > projection function, we don't need to capture a stateless function > > object by reference, instead we can capture it by value and use > > [[no_unique_address]] to elide its storage. This makes using > > __make_comp_proj zero-cost in the common case where the comparator > > and projection are both stateless. > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/ranges_algo.h (__detail::_Comp_proj): New. > > (__detail::__make_comp_proj): Use it instead. > > (__detail::_Pred_proj): New. > > (__detail::__make_pred_proj): Use it instead. > > --- > > libstdc++-v3/include/bits/ranges_algo.h | 64 > ++++++++++++++++++------- > > 1 file changed, 48 insertions(+), 16 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > b/libstdc++-v3/include/bits/ranges_algo.h > > index cc182d110b30..a7df4c9a13ca 100644 > > --- a/libstdc++-v3/include/bits/ranges_algo.h > > +++ b/libstdc++-v3/include/bits/ranges_algo.h > > @@ -49,27 +49,59 @@ namespace ranges > > namespace __detail > > { > > template<typename _Comp, typename _Proj> > > - constexpr auto > > + struct _Comp_proj > > + { > > + [[no_unique_address]] > > + __conditional_t<is_empty_v<_Comp>, _Comp, _Comp&> > _M_comp; > > + [[no_unique_address]] > > + __conditional_t<is_empty_v<_Proj>, _Proj, _Proj&> > _M_proj; > > > > I would also store function pointers and member pointers (for proj) by > value, > > instead of by reference. So instead of is_empty_v I would check: > > is_empty_v<_Func> || is_pointer_v<_Func> || is_member_pointer_v<_Func> > > Or just: > > __or_v<is_scalar<_Func>, is_empty<_Func>>. > > Sounds good, like so? > > -- >8 -- > > Subject: [PATCH] libstdc++: Optimize __make_comp/pred_proj for empty/scalar > types > > Changes in v4: > - Also capture scalar (non-empty) functions by value. > > Changes in v3: > - Remove redundant is_copy_constructible check when deciding > whether to capture by reference or by value. > > Changes in v2: > - Use a functor utilizing [[no_unique_address]] instead of a lambda, > to also concisely handle the case where only one of the functino > object is stateless. In turn check is_copy_xible instead of > is_default_xible. > > -- >8 -- > > When creating a composite comparator/predicate that invokes a given > projection function, we don't need to capture a scalar (such as a > function pointer or member pointer) or empty object by reference, > instead capture it by value and use [[no_unique_address]] to elide its > storage in the empty case. This makes using __make_comp_proj zero-cost > in the common case where the comparator and projection are both > empty/scalars. > > libstdc++-v3/ChangeLog: > > * include/bits/ranges_algo.h (__detail::__by_ref_or_value_fn): New. > (__detail::_Comp_proj): New. > (__detail::__make_comp_proj): Use it instead. > (__detail::_Pred_proj): New. > (__detail::__make_pred_proj): Use it instead. > --- > libstdc++-v3/include/bits/ranges_algo.h | 64 ++++++++++++++++++------- > 1 file changed, 48 insertions(+), 16 deletions(-) > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > b/libstdc++-v3/include/bits/ranges_algo.h > index cc182d110b30..7447fbd21f7e 100644 > --- a/libstdc++-v3/include/bits/ranges_algo.h > +++ b/libstdc++-v3/include/bits/ranges_algo.h > @@ -48,28 +48,60 @@ namespace ranges > { > namespace __detail > { > + template<typename _Fp> > + using __by_ref_or_value_fn > + = __conditional_t<is_scalar_v<_Fp> || is_empty_v<_Fp>, _Fp, _Fp&>; > + > template<typename _Comp, typename _Proj> > - constexpr auto > + struct _Comp_proj > + { > + [[no_unique_address]] __by_ref_or_value_fn<_Comp> _M_comp; > + [[no_unique_address]] __by_ref_or_value_fn<_Proj> _M_proj; > + > + constexpr > + _Comp_proj(_Comp& __comp, _Proj& __proj) > + : _M_comp(__comp), _M_proj(__proj) > + { } > + > + template<typename _Tp, typename _Up> > + constexpr bool > + operator()(_Tp&& __x, _Up&& __y) > + { > + return std::__invoke(_M_comp, > + std::__invoke(_M_proj, > std::forward<_Tp>(__x)), > + std::__invoke(_M_proj, > std::forward<_Up>(__y))); > + } > + }; > + > + template<typename _Comp, typename _Proj> > + constexpr _Comp_proj<_Comp, _Proj> > __make_comp_proj(_Comp& __comp, _Proj& __proj) > + { return {__comp, __proj}; } > + > + template<typename _Pred, typename _Proj> > + struct _Pred_proj > { > - return [&] (auto&& __lhs, auto&& __rhs) -> bool { > - using _TL = decltype(__lhs); > - using _TR = decltype(__rhs); > - return std::__invoke(__comp, > - std::__invoke(__proj, > std::forward<_TL>(__lhs)), > - std::__invoke(__proj, > std::forward<_TR>(__rhs))); > - }; > - } > + [[no_unique_address]] __by_ref_or_value_fn<_Pred> _M_pred; > + [[no_unique_address]] __by_ref_or_value_fn<_Proj> _M_proj; > + > + constexpr > + _Pred_proj(_Pred& __pred, _Proj& __proj) > + : _M_pred(__pred), _M_proj(__proj) > + { } > + > + template<typename _Tp> > + constexpr bool > + operator()(_Tp&& __x) > + { > + return std::__invoke(_M_pred, > + std::__invoke(_M_proj, > std::forward<_Tp>(__x))); > + } > + }; > > template<typename _Pred, typename _Proj> > - constexpr auto > + constexpr _Pred_proj<_Pred, _Proj> > __make_pred_proj(_Pred& __pred, _Proj& __proj) > - { > - return [&] <typename _Tp> (_Tp&& __arg) -> bool { > - return std::__invoke(__pred, > - std::__invoke(__proj, > std::forward<_Tp>(__arg))); > - }; > - } > + { return {__pred, __proj}; } > } // namespace __detail > > struct __all_of_fn > -- > 2.50.0.rc2 >