On Fri, 18 Oct 2024, Jonathan Wakely wrote: > On 16/10/24 21:39 -0400, Patrick Palka wrote: > > On Tue, 15 Oct 2024, Jonathan Wakely wrote: > > > +#if __cplusplus < 201103L > > > + > > > + // True if we can unwrap _Iter to get a pointer by using > > > std::__niter_base. > > > + template<typename _Iter> > > > + struct __unwrappable_niter > > > + { > > > + template<typename> struct __is_ptr { enum { __value = 0 }; }; > > > + template<typename _Tp> struct __is_ptr<_Tp*> { enum { __value = 1 > > > }; }; > > > + > > > + typedef __decltype(std::__niter_base(*(_Iter*)0)) _Base; > > > + > > > + enum { __value = __is_ptr<_Base>::__value }; > > > + }; > > > > It might be slightly cheaper to define this without the nested class > > template as: > > > > template<typename _Iter, typename _Base = > > __decltype(std::__niter_base(*(_Iter*)0))> > > struct __unwrappable_niter > > { enum { __value = false }; }; > > > > template<typename _Iter, typename _Tp> > > struct __unwrappable_niter<_Iter, _Tp*> > > { enum { __value = true }; };
One minor nit, we might as well use 'value' since it's a reserved name even in C++98? > > Nice. I think after spending a while failing to make any C++98 > metaprogramming work for __memcpyable in cpp_type_traits.h I was not > in the mood for fighting C++98 any more :-) But this works well. > > > > + > > > + // Use template specialization for C++98 when 'if constexpr' can't be > > > used. > > > + template<bool _CanMemcpy> > > > struct __uninitialized_copy > > > { > > > template<typename _InputIterator, typename _ForwardIterator> > > > @@ -186,53 +172,150 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > template<> > > > struct __uninitialized_copy<true> > > > { > > > + // Overload for generic iterators. > > > template<typename _InputIterator, typename _ForwardIterator> > > > static _ForwardIterator > > > __uninit_copy(_InputIterator __first, _InputIterator __last, > > > _ForwardIterator __result) > > > - { return std::copy(__first, __last, __result); } > > > - }; > > > + { > > > + if (__unwrappable_niter<_InputIterator>::__value > > > + && __unwrappable_niter<_ForwardIterator>::__value) > > > + { > > > + __uninit_copy(std::__niter_base(__first), > > > + std::__niter_base(__last), > > > + std::__niter_base(__result)); > > > + std::advance(__result, std::distance(__first, __last)); > > > + return __result; > > > + } > > > + else > > > + return std::__do_uninit_copy(__first, __last, __result); > > > + } > > > > > > + // Overload for pointers. > > > + template<typename _Tp, typename _Up> > > > + static _Up* > > > + __uninit_copy(_Tp* __first, _Tp* __last, _Up* __result) > > > + { > > > + // Ensure that we don't successfully memcpy in cases that should be > > > + // ill-formed because is_constructible<_Up, _Tp&> is false. > > > + typedef __typeof__(static_cast<_Up>(*__first)) __check > > > + __attribute__((__unused__)); > > > + > > > + if (const ptrdiff_t __n = __last - __first) > > > > Do we have to worry about the __n == 1 case here like in the C++11 code > > path? > > Actually I think we don't need to worry about it in either case. > > C++20 had a note in [specialized.algorithms.general]/3 that said: > > [Note 1: When invoked on ranges of potentially-overlapping subobjects > ([intro.object]), the algorithms specified in [specialized.algorithms] > result in undefined behavior. — end note] > > The reason is that the uninitialized algos create new objects at the > specified storage locations, and creating new objects reuses storage, > which ends the lifetime of any other objects in that storage. That > includes any objects that were in tail padding within that storage. > > See Casey's Feb 2023 comment at > https://github.com/cplusplus/draft/issues/6143 > > That note was removed for C++23 (which is unfortunate IMHO), but the > algos still reuse storage by creating new objects, and so still end > the lifetime of potentially-overlapping subobjects within that > storage. > > For std::copy there are no new objects created, and the effects are > specified in terms of assignment, which does not reuse storage. A > compiler-generated trivial copy assignment operator is careful to not > overwrite tail padding, so we can't use memmove if it would produce > different effects. > > tl;dr I think I can remove the __n == 1 handling from the C++11 paths. Nice, makes sense > > > > + { > > > + __builtin_memcpy(__result, __first, __n * sizeof(_Tp)); > > > + __result += __n; > > > + } > > > + return __result; > > > + } > > > + }; > > > +#endif > > > /// @endcond > > > > > > +#pragma GCC diagnostic push > > > +#pragma GCC diagnostic ignored "-Wc++17-extensions" > > > /** > > > * @brief Copies the range [first,last) into result. > > > * @param __first An input iterator. > > > * @param __last An input iterator. > > > - * @param __result An output iterator. > > > - * @return __result + (__first - __last) > > > + * @param __result A forward iterator. > > > + * @return __result + (__last - __first) > > > * > > > - * Like copy(), but does not require an initialized output range. > > > + * Like std::copy, but does not require an initialized output range. > > > */ > > > template<typename _InputIterator, typename _ForwardIterator> > > > inline _ForwardIterator > > > uninitialized_copy(_InputIterator __first, _InputIterator __last, > > > _ForwardIterator __result) > > > { > > > + // We can use memcpy to copy the ranges under these conditions: > > > + // > > > + // _ForwardIterator and _InputIterator are both contiguous > > > iterators, > > > + // so that we can turn them into pointers to pass to memcpy. > > > + // Before C++20 we can't detect all contiguous iterators, so we > > > only > > > + // handle built-in pointers and __normal_iterator<T*, C> types. > > > + // > > > + // The value types of both iterators are trivially-copyable types, > > > + // so that memcpy is not undefined and can begin the lifetime of > > > + // new objects in the output range. > > > + // > > > + // Finally, memcpy from the source type, S, to the destination > > > type, D, > > > + // must give the same value as initialization of D from S would > > > give. > > > + // We require is_trivially_constructible<D, S> to be true, but that > > > is > > > + // not sufficient. Some cases of trivial initialization are not > > > just a > > > + // bitwise copy, even when sizeof(D) == sizeof(S), > > > + // e.g. bit_cast<unsigned>(1.0f) != 1u because the corresponding > > > bits > > > + // of the value representations do not have the same meaning. > > > + // We cannot tell when this condition is true in general, > > > + // so we rely on the __memcpyable trait. > > > + > > > +#if __cplusplus >= 201103L > > > + using _Dest = decltype(std::__niter_base(__result)); > > > + using _Src = decltype(std::__niter_base(__first)); > > > + using _ValT = typename > > > iterator_traits<_ForwardIterator>::value_type; > > > + > > > + if constexpr (!__is_trivially_constructible(_ValT, > > > decltype(*__first))) > > > + return std::__do_uninit_copy(__first, __last, __result); > > > + else if constexpr (__memcpyable<_Dest, _Src>::__value) > > > + { > > > + ptrdiff_t __n = __last - __first; > > > + if (__builtin_expect(__n > 1, true)) > > > > Could we use [[__likely__]] instead? > > We could indeed. > > > > + { > > > + using _ValT = typename remove_pointer<_Src>::type; > > > + __builtin_memcpy(std::__niter_base(__result), > > > + std::__niter_base(__first), > > > + __n * sizeof(_ValT)); > > > + __result += __n; > > > + } > > > + else if (__n == 1) // memcpy could overwrite tail padding > > > + std::_Construct(__result++, *__first); > > > + return __result; > > > + } > > > +#if __cpp_lib_concepts > > > + else if constexpr (contiguous_iterator<_ForwardIterator> > > > + && contiguous_iterator<_InputIterator>) > > > + { > > > + using _DestPtr = decltype(std::to_address(__result)); > > > + using _SrcPtr = decltype(std::to_address(__first)); > > > + if constexpr (__memcpyable<_DestPtr, _SrcPtr>::__value) > > > + { > > > + if (auto __n = __last - __first; __n > 1) [[likely]] > > > + { > > > + void* __dest = std::to_address(__result); > > > + const void* __src = std::to_address(__first); > > > + size_t __nbytes = __n * sizeof(remove_pointer_t<_DestPtr>); > > > + __builtin_memmove(__dest, __src, __nbytes); > > > > Why do we need to use memmove instead of memcpy here? > > Looks like I just forgot to change it here when I realised that we can > use memcpy instead of memmove. Oops. > > > > + __result += __n; > > > + } > > > + else if (__n == 1) // memcpy could overwrite tail padding > > > + std::construct_at(std::to_address(__result++), *__first); > > > + return __result; > > > + } > > > + else > > > + return std::__do_uninit_copy(__first, __last, __result); > > > + } > > > +#endif > > > + else > > > + return std::__do_uninit_copy(__first, __last, __result); > > > +#else // C++98 > > > typedef typename iterator_traits<_InputIterator>::value_type > > > _ValueType1; > > > typedef typename iterator_traits<_ForwardIterator>::value_type > > > _ValueType2; > > > > > > - // _ValueType1 must be trivially-copyable to use memmove, so don't > > > - // bother optimizing to std::copy if it isn't. > > > - // XXX Unnecessary because std::copy would check it anyway? > > > - const bool __can_memmove = __is_trivial(_ValueType1); > > > + const bool __can_memcpy > > > + = __memcpyable<_ValueType1*, _ValueType2*>::__value > > > + && __is_trivially_constructible(_ValueType2, > > > __decltype(*__first)); > > > > > > -#if __cplusplus < 201103L > > > - typedef typename iterator_traits<_InputIterator>::reference _From; > > > -#else > > > - using _From = decltype(*__first); > > > + return __uninitialized_copy<__can_memcpy>:: > > > + __uninit_copy(__first, __last, __result); > > > #endif > > > - const bool __assignable > > > - = _GLIBCXX_USE_ASSIGN_FOR_INIT(_ValueType2, _From); > > > - > > > - return std::__uninitialized_copy<__can_memmove && __assignable>:: > > > - __uninit_copy(__first, __last, __result); > > > } > > > +#pragma GCC diagnostic pop > > > > > > /// @cond undocumented > > > > > > + // This is the default implementation of std::uninitialized_fill. > > > template<typename _ForwardIterator, typename _Tp> > > > _GLIBCXX20_CONSTEXPR void > > > __do_uninit_fill(_ForwardIterator __first, _ForwardIterator __last, > > > @@ -244,12 +327,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > __guard.release(); > > > } > > > > > > - template<bool _TrivialValueType> > > > +#if __cplusplus < 201103L > > > + // Use template specialization for C++98 when 'if constexpr' can't be > > > used. > > > + template<bool _CanMemset> > > > struct __uninitialized_fill > > > { > > > template<typename _ForwardIterator, typename _Tp> > > > - static void > > > - __uninit_fill(_ForwardIterator __first, _ForwardIterator __last, > > > + static void > > > + __uninit_fill(_ForwardIterator __first, _ForwardIterator __last, > > > const _Tp& __x) > > > { std::__do_uninit_fill(__first, __last, __x); } > > > }; > > > @@ -257,56 +342,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > template<> > > > struct __uninitialized_fill<true> > > > { > > > + // Overload for generic iterators. > > > template<typename _ForwardIterator, typename _Tp> > > > - static void > > > - __uninit_fill(_ForwardIterator __first, _ForwardIterator __last, > > > + static void > > > + __uninit_fill(_ForwardIterator __first, _ForwardIterator __last, > > > const _Tp& __x) > > > - { std::fill(__first, __last, __x); } > > > - }; > > > + { > > > + if (__unwrappable_niter<_ForwardIterator>::__value) > > > + __uninit_fill(std::__niter_base(__first), > > > + std::__niter_base(__last), > > > + __x); > > > + else > > > + std::__do_uninit_copy(__first, __last, __x); > > > + } > > > > > > + // Overload for pointers. > > > + template<typename _Up, typename _Tp> > > > + static void > > > + __uninit_fill(_Up* __first, _Up* __last, const _Tp& __x) > > > + { > > > + // Ensure that we don't successfully memset in cases that should be > > > + // ill-formed because is_constructible<_Up, const _Tp&> is false. > > > + typedef __typeof__(static_cast<_Up>(__x)) __check > > > + __attribute__((__unused__)); > > > + > > > + if (__first != __last) > > > + __builtin_memset(__first, (int)__x, __last - __first); > > > > Maybe (unsigned char)__x for consistency with the C++11 code path? > > Yes, I have no idea why I did (int) there. > > > > + } > > > + }; > > > +#endif > > > /// @endcond > > > > > > /** > > > * @brief Copies the value x into the range [first,last). > > > - * @param __first An input iterator. > > > - * @param __last An input iterator. > > > + * @param __first A forward iterator. > > > + * @param __last A forward iterator. > > > * @param __x The source value. > > > * @return Nothing. > > > * > > > - * Like fill(), but does not require an initialized output range. > > > + * Like std::fill, but does not require an initialized output range. > > > */ > > > template<typename _ForwardIterator, typename _Tp> > > > inline void > > > uninitialized_fill(_ForwardIterator __first, _ForwardIterator __last, > > > const _Tp& __x) > > > { > > > + // We would like to use memset to optimize this loop when possible. > > > + // As for std::uninitialized_copy, the optimization requires > > > + // contiguous iterators and trivially copyable value types, > > > + // with the additional requirement that sizeof(_Tp) == 1 because > > > + // memset only writes single bytes. > > > + > > > + // FIXME: We could additionally enable this for 1-byte enums. > > > + // Maybe any 1-byte Val if is_trivially_constructible<Val, const > > > T&>? > > > + > > > typedef typename iterator_traits<_ForwardIterator>::value_type > > > _ValueType; > > > > > > - // Trivial types do not need a constructor to begin their lifetime, > > > - // so try to use std::fill to benefit from its memset optimization. > > > - const bool __can_fill > > > - = _GLIBCXX_USE_ASSIGN_FOR_INIT(_ValueType, const _Tp&); > > > +#if __cplusplus >= 201103L > > > +#pragma GCC diagnostic push > > > +#pragma GCC diagnostic ignored "-Wc++17-extensions" > > > + if constexpr (__is_byte<_ValueType>::__value) > > > + if constexpr (is_same<_ValueType, _Tp>::value > > > + || is_integral<_Tp>::value) > > > + { > > > + using _BasePtr = decltype(std::__niter_base(__first)); > > > + if constexpr (is_pointer<_BasePtr>::value) > > > + { > > > + void* __dest = std::__niter_base(__first); > > > + ptrdiff_t __n = __last - __first; > > > + if (__builtin_expect(__n > 0, true)) > > > + __builtin_memset(__dest, (unsigned char)__x, __n); > > > + return; > > > + } > > > +#if __cpp_lib_concepts > > > + else if constexpr (contiguous_iterator<_ForwardIterator>) > > > + { > > > + auto __dest = std::__to_address(__first); > > > + auto __n = __last - __first; > > > + if (__builtin_expect(__n > 0, true)) > > > + __builtin_memset(__dest, (unsigned char)__x, __n); > > > + return; > > > + } > > > +#endif > > > + } > > > + std::__do_uninit_fill(__first, __last, __x); > > > +#pragma GCC diagnostic pop > > > +#else // C++98 > > > + const bool __can_memset = __is_byte<_ValueType>::__value > > > + && __is_integer<_Tp>::__value; > > > > > > - std::__uninitialized_fill<__can_fill>:: > > > - __uninit_fill(__first, __last, __x); > > > + __uninitialized_fill<__can_memset>::__uninit_fill(__first, __last, > > > __x); > > > +#endif > > > } > > > > > > /// @cond undocumented > > > > > > + // This is the default implementation of std::uninitialized_fill_n. > > > template<typename _ForwardIterator, typename _Size, typename _Tp> > > > _GLIBCXX20_CONSTEXPR > > > _ForwardIterator > > > __do_uninit_fill_n(_ForwardIterator __first, _Size __n, const _Tp& > > > __x) > > > { > > > _UninitDestroyGuard<_ForwardIterator> __guard(__first); > > > - for (; __n > 0; --__n, (void) ++__first) > > > +#if __cplusplus >= 201103L > > > +#pragma GCC diagnostic push > > > +#pragma GCC diagnostic ignored "-Wc++17-extensions" > > > + if constexpr (is_integral<_Size>::value) > > > + // Loop will never terminate if __n is negative. > > > + __glibcxx_assert(__n >= 0); > > > + else if constexpr (is_floating_point<_Size>::value) > > > + // Loop will never terminate if __n is not an integer. > > > + __glibcxx_assert(__n >= 0 && static_cast<size_t>(__n) == __n); > > > +#pragma GCC diagnostic pop > > > +#endif > > > + for (; __n--; ++__first) > > > std::_Construct(std::__addressof(*__first), __x); > > > __guard.release(); > > > return __first; > > > } > > > > > > - template<bool _TrivialValueType> > > > +#if __cplusplus < 201103L > > > + // Use template specialization for C++98 when 'if constexpr' can't be > > > used. > > > + template<bool _CanMemset> > > > struct __uninitialized_fill_n > > > { > > > template<typename _ForwardIterator, typename _Size, typename _Tp> > > > @@ -319,47 +477,92 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > template<> > > > struct __uninitialized_fill_n<true> > > > { > > > + // Overload for generic iterators. > > > template<typename _ForwardIterator, typename _Size, typename _Tp> > > > static _ForwardIterator > > > __uninit_fill_n(_ForwardIterator __first, _Size __n, > > > const _Tp& __x) > > > - { return std::fill_n(__first, __n, __x); } > > > + { > > > + if (__unwrappable_niter<_ForwardIterator>::__value) > > > + { > > > + _ForwardIterator __last = __first; > > > + std::advance(__last, __n); > > > + __uninitialized_fill<true>::__uninit_fill(__first, __last, __x); > > > + return __last; > > > + } > > > + else > > > + return std::__do_uninit_fill_n(__first, __n, __x); > > > + } > > > }; > > > - > > > +#endif > > > /// @endcond > > > > > > +#pragma GCC diagnostic push > > > +#pragma GCC diagnostic ignored "-Wc++17-extensions" > > > // _GLIBCXX_RESOLVE_LIB_DEFECTS > > > // DR 1339. uninitialized_fill_n should return the end of its range > > > /** > > > * @brief Copies the value x into the range [first,first+n). > > > - * @param __first An input iterator. > > > + * @param __first A forward iterator. > > > * @param __n The number of copies to make. > > > * @param __x The source value. > > > - * @return Nothing. > > > + * @return __first + __n. > > > * > > > - * Like fill_n(), but does not require an initialized output range. > > > + * Like std::fill_n, but does not require an initialized output range. > > > */ > > > template<typename _ForwardIterator, typename _Size, typename _Tp> > > > inline _ForwardIterator > > > uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& > > > __x) > > > { > > > + // See uninitialized_fill conditions. We also require _Size to be > > > + // an integer. The standard only requires _Size to be decrementable > > > + // and contextually convertible to bool, so don't assume first+n > > > works. > > > + > > > + // FIXME: We could additionally enable this for 1-byte enums. > > > + > > > typedef typename iterator_traits<_ForwardIterator>::value_type > > > _ValueType; > > > > > > - // Trivial types do not need a constructor to begin their lifetime, > > > - // so try to use std::fill_n to benefit from its optimizations. > > > - const bool __can_fill > > > - = _GLIBCXX_USE_ASSIGN_FOR_INIT(_ValueType, const _Tp&) > > > - // For arbitrary class types and floating point types we can't > > > assume > > > - // that __n > 0 and std::__size_to_integer(__n) > 0 are equivalent, > > > - // so only use std::fill_n when _Size is already an integral type. > > > - && __is_integer<_Size>::__value; > > > +#if __cplusplus >= 201103L > > > + if constexpr (__is_byte<_ValueType>::__value) > > > + if constexpr (is_integral<_Tp>::value) > > > + if constexpr (is_integral<_Size>::value) > > > + { > > > + using _BasePtr = decltype(std::__niter_base(__first)); > > > + if constexpr (is_pointer<_BasePtr>::value) > > > + { > > > + void* __dest = std::__niter_base(__first); > > > + if (__builtin_expect(__n > 0, true)) > > > + { > > > + __builtin_memset(__dest, (unsigned char)__x, __n); > > > + __first += __n; > > > + } > > > + return __first; > > > + } > > > +#if __cpp_lib_concepts > > > + else if constexpr (contiguous_iterator<_ForwardIterator>) > > > + { > > > + auto __dest = std::__to_address(__first); > > > + if (__builtin_expect(__n > 0, true)) > > > + { > > > + __builtin_memset(__dest, (unsigned char)__x, __n); > > > + __first += __n; > > > + } > > > + return __first; > > > + } > > > +#endif > > > + } > > > + return std::__do_uninit_fill_n(__first, __n, __x); > > > +#else // C++98 > > > + const bool __can_memset = __is_byte<_ValueType>::__value > > > + && __is_integer<_Tp>::__value > > > + && __is_integer<_Size>::__value; > > > > > > - return __uninitialized_fill_n<__can_fill>:: > > > + return __uninitialized_fill_n<__can_memset>:: > > > __uninit_fill_n(__first, __n, __x); > > > +#endif > > > } > > > - > > > -#undef _GLIBCXX_USE_ASSIGN_FOR_INIT > > > +#pragma GCC diagnostic pop > > > > > > /// @cond undocumented > > > > > > @@ -619,7 +822,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > = std::__addressof(*__first); > > > std::_Construct(__val); > > > if (++__first != __last) > > > - std::fill(__first, __last, *__val); > > > + std::uninitialized_fill(__first, __last, *__val); > > > } > > > }; > > > > > > @@ -653,7 +856,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > > = std::__addressof(*__first); > > > std::_Construct(__val); > > > ++__first; > > > - __first = std::fill_n(__first, __n - 1, *__val); > > > + __first = std::uninitialized_fill_n(__first, __n - 1, *__val); > > > > These last two changes seem to be missing in the ChangeLog. > > Oops, yes. I'll take them out of this patch entirely. I have a > separate series of patches for those algos anyway. They should use > uninitialized_fill{,_n} instead of fill{,_n} for correctness, but the > conditions for deciding whether to use it need fixing anyway (we don't > care if it's assignable if we're not using std::fill). > > I'll make the changes you suggested (except for a different fix for > the n==1 case, as described above) and push. Thanks for the detailed > review. > >