Hi, this patch turns memmove to memcpy where we can and also avoids extra guard checking if block is non-empty. This does not show as performance improvement in my push_back micro-benchmark because vector rellocation does not happen that often. In general, however, we optimize memcpy better then memove (can inline it in some cases). Saving extra 3 instructions makes push_back more likely to be inlined though (estimate is now 23)
I also filled in PR112653. I think for default allocator we should be able to work out from PTA that the memmove can be memcpy. Honestly I am not quite sure if I need to have the first __relocat_copy_a_1 tempalte. It handles the case we can't use memmove, but in my limited C++ skills I don't see how to get rid of it or make it a wrapper for __relocat_a_1 which is identical. Regtested on x86_64-linux. libstdc++-v3/ChangeLog: * include/bits/stl_uninitialized.h (__relocate_copy_a_1): New member fnctions. (__relocate_a_1): Do not check count to be non-zero before calling memmove. (__relocate_copy_a): New member function. * include/bits/stl_vector.h (_S_do_relocate_copy): New member function. * include/bits/vector.tcc (reserve, _M_realloc_append, _M_realloc_insert, _M_default_append): Use _S_relocate_copy. diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h b/libstdc++-v3/include/bits/stl_uninitialized.h index 1282af3bc43..983fa315e1b 100644 --- a/libstdc++-v3/include/bits/stl_uninitialized.h +++ b/libstdc++-v3/include/bits/stl_uninitialized.h @@ -1104,6 +1104,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::__addressof(*__first), __alloc); return __cur; } + template <typename _InputIterator, typename _ForwardIterator, + typename _Allocator> + _GLIBCXX20_CONSTEXPR + inline _ForwardIterator + __relocate_copy_a_1(_InputIterator __first, _InputIterator __last, + _ForwardIterator __result, _Allocator& __alloc) + noexcept(noexcept(std::__relocate_object_a(std::addressof(*__result), + std::addressof(*__first), + __alloc))) + { + typedef typename iterator_traits<_InputIterator>::value_type + _ValueType; + typedef typename iterator_traits<_ForwardIterator>::value_type + _ValueType2; + static_assert(std::is_same<_ValueType, _ValueType2>::value, + "relocation is only possible for values of the same type"); + _ForwardIterator __cur = __result; + for (; __first != __last; ++__first, (void)++__cur) + std::__relocate_object_a(std::__addressof(*__cur), + std::__addressof(*__first), __alloc); + return __cur; + } #if _GLIBCXX_HOSTED template <typename _Tp, typename _Up> @@ -1114,20 +1136,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION [[__maybe_unused__]] allocator<_Up>& __alloc) noexcept { ptrdiff_t __count = __last - __first; - if (__count > 0) - { #ifdef __cpp_lib_is_constant_evaluated - if (std::is_constant_evaluated()) + if (std::is_constant_evaluated()) + { + // Can't use memmove. Wrap the pointer so that __relocate_a_1 + // resolves to the non-trivial overload above. + if (__count > 0) { - // Can't use memmove. Wrap the pointer so that __relocate_a_1 - // resolves to the non-trivial overload above. __gnu_cxx::__normal_iterator<_Tp*, void> __out(__result); __out = std::__relocate_a_1(__first, __last, __out, __alloc); return __out.base(); } + return __result; + } #endif - __builtin_memmove(__result, __first, __count * sizeof(_Tp)); + __builtin_memmove(__result, __first, __count * sizeof(_Tp)); + return __result + __count; + } + template <typename _Tp, typename _Up> + _GLIBCXX20_CONSTEXPR + inline __enable_if_t<std::__is_bitwise_relocatable<_Tp>::value, _Tp*> + __relocate_copy_a_1(_Tp* __first, _Tp* __last, + _Tp* __result, + [[__maybe_unused__]] allocator<_Up>& __alloc) noexcept + { + ptrdiff_t __count = __last - __first; +#ifdef __cpp_lib_is_constant_evaluated + if (std::is_constant_evaluated()) + { + // Can't use memcpy. Wrap the pointer so that __relocate_copy_a_1 + // resolves to the non-trivial overload above. + if (__count > 0) + { + __gnu_cxx::__normal_iterator<_Tp*, void> __out(__result); + __out = std::__relocate_a_1(__first, __last, __out, __alloc); + return __out.base(); + } + return __result; } +#endif + __builtin_memcpy(__result, __first, __count * sizeof(_Tp)); return __result + __count; } #endif @@ -1146,6 +1194,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::__niter_base(__last), std::__niter_base(__result), __alloc); } + template <typename _InputIterator, typename _ForwardIterator, + typename _Allocator> + _GLIBCXX20_CONSTEXPR + inline _ForwardIterator + __relocate_copy_a(_InputIterator __first, _InputIterator __last, + _ForwardIterator __result, _Allocator& __alloc) + noexcept(noexcept(__relocate_copy_a_1(std::__niter_base(__first), + std::__niter_base(__last), + std::__niter_base(__result), __alloc))) + { + return std::__relocate_copy_a_1(std::__niter_base(__first), + std::__niter_base(__last), + std::__niter_base(__result), __alloc); + } /// @endcond #endif // C++11 diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index 973f4d7e2e9..4f9dba6c3fe 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -507,6 +507,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER #else using __do_it = __bool_constant<_S_use_relocate()>; return _S_do_relocate(__first, __last, __result, __alloc, __do_it{}); +#endif + } + static pointer + _S_do_relocate_copy(pointer __first, pointer __last, pointer __result, + _Tp_alloc_type& __alloc, true_type) noexcept + { + return std::__relocate_a(__first, __last, __result, __alloc); + } + + static pointer + _S_do_relocate_copy(pointer, pointer, pointer __result, + _Tp_alloc_type&, false_type) noexcept + { return __result; } + // same as _S_relocate but assumes that the destination block + // is disjoint (as in memcpy) + static _GLIBCXX20_CONSTEXPR pointer + _S_relocate_copy(pointer __first, pointer __last, pointer __result, + _Tp_alloc_type& __alloc) noexcept + { +#if __cpp_if_constexpr + // All callers have already checked _S_use_relocate() so just do it. + return std::__relocate_copy_a(__first, __last, __result, __alloc); +#else + using __do_it = __bool_constant<_S_use_relocate()>; + return _S_do_relocate_copy(__first, __last, __result, __alloc, __do_it{}); #endif } #endif // C++11 diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index 0ccef7911b3..2468ad85f49 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -77,8 +77,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) { __tmp = this->_M_allocate(__n); - _S_relocate(this->_M_impl._M_start, this->_M_impl._M_finish, - __tmp, _M_get_Tp_allocator()); + _S_relocate_copy(this->_M_impl._M_start, this->_M_impl._M_finish, + __tmp, _M_get_Tp_allocator()); } else #endif @@ -515,11 +515,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) { // Relocation cannot throw. - __new_finish = _S_relocate(__old_start, __position.base(), - __new_start, _M_get_Tp_allocator()); + __new_finish = _S_relocate_copy(__old_start, __position.base(), + __new_start, _M_get_Tp_allocator()); ++__new_finish; - __new_finish = _S_relocate(__position.base(), __old_finish, - __new_finish, _M_get_Tp_allocator()); + __new_finish = _S_relocate_copy(__position.base(), __old_finish, + __new_finish, _M_get_Tp_allocator()); } else #endif @@ -644,8 +644,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) { // Relocation cannot throw. - __new_finish = _S_relocate(__old_start, __old_finish, - __new_start, _M_get_Tp_allocator()); + __new_finish = _S_relocate_copy(__old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); ++__new_finish; } else @@ -865,8 +865,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) { - _S_relocate(__old_start, __old_finish, - __new_start, _M_get_Tp_allocator()); + _S_relocate_copy(__old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); } else {