Hi,
std::vector has independent implementation for bool which has its won
size/capacity functions.  I updated them to add __builtin_unreachable to
announce that size is never more than max_size.  However while testing the code
I noticed that even construction of unused copy is not optimized out.  Main
problem is that the vector copying loops copies the tail of vector using loop
that copies bit by bit.  We eventually pattern match it to bit operations (that
surprises me) but we need to unroll it and run through loop optimization that
happens late.

This patch also updates copy_aglined to use bit operations. However for = 
operation
we can do better since it is not necessary to preserve original bits (those are
undefined anyway). So I added copy_aglined_trail (better name would be welcome)
that does not care about trailing bits of the copied block.

As a result the following functions are optimized to empty functions:
#include <vector>
bool
empty(std::vector<bool> src)
{
        std::vector <bool> data=src;
        return false;
}
bool
empty2(std::vector<bool> src)
{
        std::vector <bool> data;
        data.reserve(1);
        return data.size ();
}
bool
empty3()
{
        std::vector <bool> data;
        data.push_back (true);
        return data[0];
}

Finally I mirrored changes to push_back from normal vectors to bit vectors.
This involve separating append from insert and breaking out the resizing (and
cold) path to separate function. 

Here are two little benchmarks on push_back:

#include <vector>

__attribute__ ((noipa))
std::vector<bool>
test()
{
        std::vector <bool> t;
        t.push_back (1);
        t.push_back (2);
        return t;
}
int
main()
{
        for (int i = 0; i < 100000000; i++)
        {
                test();
        }
        return 0;
}

                                                 runtime(s) .text size of a.out
gcc -O2                                             1.04    1606
gcc -O2 + patch                                     0.98    1315
gcc -O3                                             0.98    1249
gcc -O3 + patch                                     0.96    1138
gcc -O3 + patch --param max-inline-insns-auto=5000  0.96    1138
clang -O2                                           1.56    1823
clang -O3                                           1.56    1839
clang -O2 + libc++                                  2.31    4272
clang -O3 + libc++                                  2.76    4262

push_back is still too large to inline fully at -O3.  If parameters are bumped 
up
for small vectors this makes it possible to propagate bit positions.
This variant of benchmark

#include <vector>

__attribute__ ((noipa))
std::vector<bool>
test()
{
        std::vector <bool> t;
        t.push_back (1);
        t.push_back (2);
        return t;
}
int
main()
{
        for (int i = 0; i < 100000000; i++)
        {
                test();
        }
        return 0;
}


                                                 runtime(s) .text size of a.out
gcc -O2                                             1.48    1574
gcc -O2 + patch                                     1.30    1177
gcc -O3                                             1.46    1515
gcc -O3 + patch                                     1.27    1069
gcc -O3 + patch --param max-inline-insns-auto=5000  1.10    388
clang -O2                                           1.48    1823
clang -O3                                           1.46    1711
clang -O2 + libc++                                  1.20    1001
clang -O3 + libc++                                  1.17    1001

Note that clang does not suport noipa attribute, so it has some advantage here.
Bootstrapped/regtested x86_64-linux, OK?

libstdc++-v3/ChangeLog:

        * include/bits/stl_bvector.h (vector<bool, _Alloc>::operator=): Use
        _M_copy_aligned_trail.
        (vector<bool, _Alloc>::copy_aglined_trail): New function.
        (vector<bool, _Alloc>::copy_aglined): Implement copying of tail 
manually.
        (vector<bool, _Alloc>::size,vector<bool, _Alloc>::capacity): Add check
        that size is at most max_size.
        (vector<bool, _Alloc>::size,vector<bool, _Alloc>::push_back): Use
        _M_append_aux.
        (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): 
Declare.
        (vector<bool, _Alloc>::size,vector<bool, 
_Alloc>::_M_realloc_append_aux): Declare.
        (vector<bool, _Alloc>::size,vector<bool, 
_Alloc>::_M_realloc_insert_aux): Declare.
        * include/bits/vector.tcc
        (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): 
Implement.
        (vector<bool, _Alloc>::size,vector<bool, 
_Alloc>::_M_realloc_append_aux): Implement.
        (vector<bool, _Alloc>::size,vector<bool, 
_Alloc>::_M_realloc_insert_aux): Break out from ...
        (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_insert_aux): ... 
here.

gcc/testsuite/ChangeLog:

        * g++.dg/tree-ssa/bvector-1.C: New test.

diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C
new file mode 100644
index 00000000000..4197ff8c4b8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C
@@ -0,0 +1,28 @@
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized"  }
+// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } }
+#include <vector>
+bool
+empty(std::vector<bool> src)
+{
+       std::vector <bool> data=src;
+       return false;
+}
+bool
+empty2(std::vector<bool> src)
+{
+       std::vector <bool> data;
+       data.reserve(1);
+       return data.size ();
+}
+bool
+empty3()
+{
+       std::vector <bool> data;
+       data.push_back (true);
+       return data[0];
+}
+// All references to src should be optimized out, so there should be no name 
for it
+// { dg-final { scan-tree-dump-not "src_"  optimized } }
+// { dg-final { scan-tree-dump-not "data_"  optimized } }
+// { dg-final { scan-tree-dump-not "new"  optimized } }
diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 341eee33b21..ce1c5121b38 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -946,8 +946,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
            this->_M_deallocate();
            _M_initialize(__x.size());
          }
-       this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(),
-                                                 begin());
+       this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(),
+                                                       begin());
        return *this;
       }
 
@@ -971,8 +971,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
                this->_M_deallocate();
                _M_initialize(__x.size());
              }
-           this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(),
-                                                     begin());
+           this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), 
__x.end(),
+                                                             begin());
            __x.clear();
          }
        return *this;
@@ -1101,7 +1101,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
       size_type
       size() const _GLIBCXX_NOEXCEPT
-      { return size_type(end() - begin()); }
+      {
+       const size_type __sz = size_type (end() - begin());
+       if (__sz > max_size ())
+          __builtin_unreachable ();
+       return __sz;
+      }
 
       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
       size_type
@@ -1119,8 +1124,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
       size_type
       capacity() const _GLIBCXX_NOEXCEPT
-      { return size_type(const_iterator(this->_M_impl._M_end_addr(), 0)
-                        - begin()); }
+      {
+       const size_type __sz = 
size_type(const_iterator(this->_M_impl._M_end_addr(), 0) - begin());
+       if (__sz > max_size ())
+          __builtin_unreachable ();
+       return __sz;
+      }
 
       _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
       bool
@@ -1221,7 +1230,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
        if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr())
          *this->_M_impl._M_finish++ = __x;
        else
-         _M_insert_aux(end(), __x);
+         _M_append_aux(__x);
       }
 
       _GLIBCXX20_CONSTEXPR
@@ -1484,8 +1493,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
                      iterator __result)
       {
        _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p);
-       return std::copy(const_iterator(__last._M_p, 0), __last,
-                        iterator(__q, 0));
+       if (!__last._M_offset)
+         return iterator(__q, 0);
+       _Bit_type __mask = ~0ul << __last._M_offset;
+       *__q = (*__q & __mask) | (*__last._M_p & ~__mask);
+       return iterator(__q, __last._M_offset);
+      }
+      _GLIBCXX20_CONSTEXPR
+      iterator
+      _M_copy_aligned_trail(const_iterator __first, const_iterator __last,
+                           iterator __result)
+      {
+       _Bit_type* __q = std::copy(__first._M_p, __last._M_p + 
(__last._M_offset != 0), __result._M_p);
+       return iterator(__q - (__last._M_offset != 0), __last._M_offset);
       }
 
       _GLIBCXX20_CONSTEXPR
@@ -1669,6 +1689,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
       void
       _M_insert_aux(iterator __position, bool __x);
 
+      _GLIBCXX20_CONSTEXPR
+      void
+      _M_realloc_insert_aux(iterator __position, bool __x);
+
+      _GLIBCXX20_CONSTEXPR
+      void
+      _M_append_aux(bool __x);
+
+      _GLIBCXX20_CONSTEXPR
+      void
+      _M_realloc_append_aux(bool __x);
+
       _GLIBCXX20_CONSTEXPR
       size_type
       _M_check_len(size_type __n, const char* __s) const
diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index 542a66173a3..f9cc78d6dcf 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -1182,6 +1182,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
              }
          }
       }
+  template<typename _Alloc>
+    _GLIBCXX20_CONSTEXPR
+    void
+    vector<bool, _Alloc>::
+    _M_realloc_insert_aux(iterator __position, bool __x)
+    {
+      const size_type __len =
+       _M_check_len(size_type(1), "vector<bool>::_M_realloc_insert_aux");
+      _Bit_pointer __q = this->_M_allocate(__len);
+      iterator __start(std::__addressof(*__q), 0);
+      iterator __i = _M_copy_aligned(begin(), __position, __start);
+      *__i++ = __x;
+      iterator __finish = std::copy(__position, end(), __i);
+      this->_M_deallocate();
+      this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
+      this->_M_impl._M_start = __start;
+      this->_M_impl._M_finish = __finish;
+    }
 
   template<typename _Alloc>
     _GLIBCXX20_CONSTEXPR
@@ -1197,19 +1215,40 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
          ++this->_M_impl._M_finish;
        }
       else
+       _M_realloc_insert_aux (__position, __x);
+    }
+
+  template<typename _Alloc>
+    _GLIBCXX20_CONSTEXPR
+    void
+    vector<bool, _Alloc>::
+    _M_realloc_append_aux(bool __x)
+    {
+      const size_type __len =
+       _M_check_len(size_type(1), "vector<bool>::_M_realloc_append_aux");
+      _Bit_pointer __q = this->_M_allocate(__len);
+      iterator __start(std::__addressof(*__q), 0);
+      iterator __i = _M_copy_aligned_trail(begin(), end(), __start);
+      *__i++ = __x;
+      this->_M_deallocate();
+      this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
+      this->_M_impl._M_start = __start;
+      this->_M_impl._M_finish = __i;
+    }
+
+  template<typename _Alloc>
+    _GLIBCXX20_CONSTEXPR
+    void
+    vector<bool, _Alloc>::
+    _M_append_aux(bool __x)
+    {
+      if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr())
        {
-         const size_type __len =
-           _M_check_len(size_type(1), "vector<bool>::_M_insert_aux");
-         _Bit_pointer __q = this->_M_allocate(__len);
-         iterator __start(std::__addressof(*__q), 0);
-         iterator __i = _M_copy_aligned(begin(), __position, __start);
-         *__i++ = __x;
-         iterator __finish = std::copy(__position, end(), __i);
-         this->_M_deallocate();
-         this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
-         this->_M_impl._M_start = __start;
-         this->_M_impl._M_finish = __finish;
+         *end() = __x;
+         ++this->_M_impl._M_finish;
        }
+      else
+       _M_realloc_append_aux (__x);
     }
 
   template<typename _Alloc>

Reply via email to