Hello,
I talked about this last year
(https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01063.html and thread),
this tweaks std::vector move assignment to help gcc generate better code
for it.
For this code
#include <vector>
#include <utility>
typedef std::vector<int> V;
void f(V&a,V&b){a=std::move(b);}
with -O2 -fdump-tree-optimized on powerpc64le-unknown-linux-gnu, we
currently have
_5 = MEM[(int * &)a_3(D)];
MEM[(int * &)a_3(D)] = 0B;
MEM[(int * &)a_3(D) + 8] = 0B;
MEM[(int * &)a_3(D) + 16] = 0B;
_6 = MEM[(int * &)b_2(D)];
MEM[(int * &)a_3(D)] = _6;
MEM[(int * &)b_2(D)] = 0B;
_7 = MEM[(int * &)a_3(D) + 8];
_8 = MEM[(int * &)b_2(D) + 8];
MEM[(int * &)a_3(D) + 8] = _8;
MEM[(int * &)b_2(D) + 8] = _7;
_9 = MEM[(int * &)a_3(D) + 16];
_10 = MEM[(int * &)b_2(D) + 16];
MEM[(int * &)a_3(D) + 16] = _10;
MEM[(int * &)b_2(D) + 16] = _9;
if (_5 != 0B)
with the patch, we go down to
_3 = MEM[(const struct _Vector_impl_data &)a_4(D)]._M_start;
_5 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_start;
MEM[(struct _Vector_impl_data *)a_4(D)]._M_start = _5;
_6 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_finish;
MEM[(struct _Vector_impl_data *)a_4(D)]._M_finish = _6;
_7 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_end_of_storage;
MEM[(struct _Vector_impl_data *)a_4(D)]._M_end_of_storage = _7;
MEM[(struct _Vector_impl_data *)b_2(D)]._M_start = 0B;
MEM[(struct _Vector_impl_data *)b_2(D)]._M_finish = 0B;
MEM[(struct _Vector_impl_data *)b_2(D)]._M_end_of_storage = 0B;
if (_3 != 0B)
removing 2 reads and 3 writes. At -O3 we also get more vectorization.
The main idea is to give the compiler more type information. Currently,
the only type information the compiler cares about is the type used for a
memory read/write. Using std::swap as before, the reads/writes are done on
int&. Doing it directly, they are done on _Vector_impl_data::_M_start, a
more precise information. Maybe some day the compiler will get stricter
and be able to deduce the same information, but not yet.
The second point is to rotate { new, old, tmp } in an order that's simpler
for the compiler. I was going to remove the swaps and use _M_copy_data
directly, but since doing the swaps in a different order works...
_M_copy_data is not really needed, we could add a defaulted assignment
operator, or remove the move constructor (and call a new _M_clear() from
the 2 users), etc. However, it seemed the least intrusive, the least
likely to have weird consequences.
I didn't add a testcase because I don't see any dg-final scan-tree-dump in
the libstdc++ testsuite. The closest would be g++.dg/pr83239.C,
g++.dg/vect/pr64410.cc, g++.dg/vect/pr33426-ivdep-4.cc that include
<vector>, but from previous experience (already with vector), adding
libstdc++ testcases to the C++ testsuite is not very popular.
Bootstrap+regtest on powerpc64le-unknown-linux-gnu.
2018-07-25 Marc Glisse <marc.gli...@inria.fr>
* include/bits/stl_vector.h (_Vector_impl_data::_M_copy_data): New.
(_Vector_impl_data::_M_swap_data): Use _M_copy_data.
(vector::_M_move_assign): Reorder the swaps.
--
Marc Glisse
Index: libstdc++-v3/include/bits/stl_vector.h
===================================================================
--- libstdc++-v3/include/bits/stl_vector.h (revision 262948)
+++ libstdc++-v3/include/bits/stl_vector.h (working copy)
@@ -96,25 +96,36 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
{ }
#if __cplusplus >= 201103L
_Vector_impl_data(_Vector_impl_data&& __x) noexcept
: _M_start(__x._M_start), _M_finish(__x._M_finish),
_M_end_of_storage(__x._M_end_of_storage)
{ __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); }
#endif
void
+ _M_copy_data(_Vector_impl_data const& __x) _GLIBCXX_NOEXCEPT
+ {
+ _M_start = __x._M_start;
+ _M_finish = __x._M_finish;
+ _M_end_of_storage = __x._M_end_of_storage;
+ }
+
+ void
_M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT
{
- std::swap(_M_start, __x._M_start);
- std::swap(_M_finish, __x._M_finish);
- std::swap(_M_end_of_storage, __x._M_end_of_storage);
+ // Do not use std::swap(_M_start, __x._M_start), etc as it looses
+ // information used by TBAA.
+ _Vector_impl_data __tmp;
+ __tmp._M_copy_data(*this);
+ _M_copy_data(__x);
+ __x._M_copy_data(__tmp);
}
};
struct _Vector_impl
: public _Tp_alloc_type, public _Vector_impl_data
{
_Vector_impl() _GLIBCXX_NOEXCEPT_IF( noexcept(_Tp_alloc_type()) )
: _Tp_alloc_type()
{ }
@@ -1724,22 +1735,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
#if __cplusplus >= 201103L
private:
// Constant-time move assignment when source object's memory can be
// moved, either because the source's allocator will move too
// or because the allocators are equal.
void
_M_move_assign(vector&& __x, true_type) noexcept
{
vector __tmp(get_allocator());
- this->_M_impl._M_swap_data(__tmp._M_impl);
this->_M_impl._M_swap_data(__x._M_impl);
+ __tmp._M_impl._M_swap_data(__x._M_impl);
std::__alloc_on_move(_M_get_Tp_allocator(), __x._M_get_Tp_allocator());
}
// Do move assignment when it might not be possible to move source
// object's memory, resulting in a linear-time operation.
void
_M_move_assign(vector&& __x, false_type)
{
if (__x._M_get_Tp_allocator() == this->_M_get_Tp_allocator())
_M_move_assign(std::move(__x), true_type());