Currently the __relocate_a function implementing in-place relocation for std::vector only optimizes to memcpy for std::allocator, which is safe because we know the precise effects of std::allocator::construct and std::allocator::destroy. We could extend the optimization to other allocators which do not have construct and detroy members, because those will use the default implementations in std::allocator_traits, and we know the effects of those members.
As well as enabling memcpy for more allocators, this also changes __relocate_a to use 'if constexpr' instead of dispatching to a pair of overloaded function templates, and to enable the memcpy optimization for arbitrary contiguous iterators, not only for pointers. libstdc++-v3/ChangeLog: PR libstdc++/87604 * include/bits/stl_uninitialized.h (__relocate_a_1): Remove. (__relocate_a): Extend memcpy optimization to contiguous iterators and to allocators that don't have custom construct and destroy members. --- As Marc pointed out in bugzilla, we are too conservative about using trivial relocation, only enabling it for std::allocator. This extends it to non-standard allocators that don't customize construct and destroy. N.B. this doesn't benefit pmr::polymorphic_allocator because that has a custom construct member, so that it can do uses-allocator construction. We could potentially make it work, because we know that if the type doesn't support uses-allocator construction then polymorphic_allocator will construct objects the same way as std::allocator would. We would need to check whether std::uses_allocator is true and the type has an allcoator-extended move constructor (in either the leading or trailing allocator form). Making that work is left for another day. Tested x86_64-linux. libstdc++-v3/include/bits/stl_uninitialized.h | 125 ++++++++++-------- 1 file changed, 69 insertions(+), 56 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h b/libstdc++-v3/include/bits/stl_uninitialized.h index b7e65eb3ca0..be18289463c 100644 --- a/libstdc++-v3/include/bits/stl_uninitialized.h +++ b/libstdc++-v3/include/bits/stl_uninitialized.h @@ -1251,70 +1251,83 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION : __bool_constant<__is_trivial(_Tp)> { }; - template <typename _InputIterator, typename _ForwardIterator, - typename _Allocator> - _GLIBCXX20_CONSTEXPR - inline _ForwardIterator - __relocate_a_1(_InputIterator __first, _InputIterator __last, - _ForwardIterator __result, _Allocator& __alloc) - noexcept(noexcept(std::__relocate_object_a(std::addressof(*__result), - std::addressof(*__first), - __alloc))) - { - typedef typename iterator_traits<_InputIterator>::value_type - _ValueType; - typedef typename iterator_traits<_ForwardIterator>::value_type - _ValueType2; - static_assert(std::is_same<_ValueType, _ValueType2>::value, - "relocation is only possible for values of the same type"); - _ForwardIterator __cur = __result; - for (; __first != __last; ++__first, (void)++__cur) - std::__relocate_object_a(std::__addressof(*__cur), - std::__addressof(*__first), __alloc); - return __cur; - } - -#if _GLIBCXX_HOSTED - template <typename _Tp, typename _Up> - _GLIBCXX20_CONSTEXPR - inline __enable_if_t<std::__is_bitwise_relocatable<_Tp>::value, _Tp*> - __relocate_a_1(_Tp* __first, _Tp* __last, - _Tp* __result, - [[__maybe_unused__]] allocator<_Up>& __alloc) noexcept - { - ptrdiff_t __count = __last - __first; - if (__count > 0) - { -#ifdef __cpp_lib_is_constant_evaluated - if (std::is_constant_evaluated()) - { - // Can't use memcpy. Wrap the pointer so that __relocate_a_1 - // resolves to the non-trivial overload above. - __gnu_cxx::__normal_iterator<_Tp*, void> __out(__result); - __out = std::__relocate_a_1(__first, __last, __out, __alloc); - return __out.base(); - } -#endif - __builtin_memcpy(__result, __first, __count * sizeof(_Tp)); - } - return __result + __count; - } -#endif - +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr template <typename _InputIterator, typename _ForwardIterator, typename _Allocator> _GLIBCXX20_CONSTEXPR inline _ForwardIterator __relocate_a(_InputIterator __first, _InputIterator __last, _ForwardIterator __result, _Allocator& __alloc) - noexcept(noexcept(__relocate_a_1(std::__niter_base(__first), - std::__niter_base(__last), - std::__niter_base(__result), __alloc))) + noexcept(noexcept(__relocate_object_a(std::__to_address(__first), + std::__to_address(__result), + __alloc))) { - return std::__relocate_a_1(std::__niter_base(__first), - std::__niter_base(__last), - std::__niter_base(__result), __alloc); + using _Dest = decltype(std::__niter_base(__result)); + using _Src = decltype(std::__niter_base(__first)); + using _ValT = typename iterator_traits<_ForwardIterator>::value_type; + using _ValT2 = typename iterator_traits<_InputIterator>::value_type; + static_assert(is_same<_ValT, _ValT2>::value, + "relocation is only possible for values of the same type"); + + if constexpr (__is_bitwise_relocatable<_ValT>::value) + { + struct _ATraits : __allocator_traits_base + { + using __allocator_traits_base::__has_construct; + using __allocator_traits_base::__has_destroy; + }; +#if _GLIBCXX_HOSTED + constexpr bool __is_std_allocator + = is_same<_Allocator, allocator<_ValT>>::value; +#else + constexpr bool __is_std_allocator = false; +#endif + constexpr bool __has_custom_cons_dest + = _ATraits::template __has_construct<_Allocator, _ValT> + && _ATraits::template __has_destroy<_Allocator, _ValT>; + + if constexpr (!__is_std_allocator && __has_custom_cons_dest) + ; // fall through to the loop below + else if (!std::__is_constant_evaluated()) + { + if constexpr (__and_<is_pointer<_Dest>, is_pointer<_Src>>::value) + { + ptrdiff_t __n = __last - __first; + if (__n > 0) [[__likely__]] + { + __builtin_memcpy(std::__niter_base(__result), + std::__niter_base(__first), + __n * sizeof(_ValT)); + __result += __n; + } + return __result; + } +#if __cpp_lib_concepts + else if constexpr (contiguous_iterator<_Dest> + && contiguous_iterator<_Src>) + { + if (auto __n = __last - __first; __n > 0) [[likely]] + { + void* __dest = std::to_address(__result); + const void* __src = std::to_address(__first); + size_t __nbytes = __n * sizeof(_ValT); + __builtin_memcpy(__dest, __src, __n * sizeof(_ValT)); + __result += __n; + } + return __result; + } +#endif + } + } + + _ForwardIterator __cur = __result; + for (; __first != __last; ++__first, (void)++__cur) + std::__relocate_object_a(std::__addressof(*__cur), + std::__addressof(*__first), __alloc); + return __cur; } +#pragma GCC diagnostic pop /// @endcond #endif // C++11 -- 2.47.1