LWG 3291 make std::ranges::iota_view's iterator have input_iterator_tag
as its iterator_category, even though it satisfies the C++20
std::forward_iterator concept. This means that the traditional
std::vector::vector(InputIterator, InputIterator) constructor treats
iota_view iterators as input iterators, because it only understands the
C++17 iterator requirements, not the C++20 iterator concepts. This
results in a loop that calls emplace_back for each individual element of
the iota_view, requiring the vector to reallocate repeatedly as the
values are inserted. This makes it unnecessarily slow to construct a
vector from an iota_view.

This change adds a new _M_range_initialize_n function for initializing a
vector from a range (which doesn't have to be common) and a size. This
new function can be used by vector(InputIterator, InputIterator) and
vector(from_range_t, R&&) when std::ranges::distance can be used to get
the size. It can also be used by the _M_range_initialize overload that
gets the size for a Cpp17ForwardIterator pair using std::distance, and
by the vector(initializer_list) constructor.

With this new function constructing a std::vector from iota_view does
a single allocation of the correct size and so doesn't need to
reallocate in a loop.

libstdc++-v3/ChangeLog:

        PR libstdc++/108487
        * include/bits/stl_vector.h (vector(initializer_list)): Call
        _M_range_initialize_n instead of _M_range_initialize.
        (vector(InputIterator, InputIterator)): Use _M_range_initialize_n
        for C++20 sized sentinels and forward iterators.
        (vector(from_range_t, R&&)): Use _M_range_initialize_n for sized
        ranges and forward ranges.
        (vector::_M_range_initialize(FwIt, FwIt, forward_iterator_tag)):
        Likewise.
        (vector::_M_range_initialize_n): New function.
        * testsuite/23_containers/vector/cons/108487.cc: New test.
---

Tests running for x86_64-linux.

This gives a 10x speed up for the PR108487 testcase using iota_view.

I don't see why doing this wouldn't be allowed by the standard, so it
seems worth doing.

 libstdc++-v3/include/bits/stl_vector.h        | 48 ++++++++++++-------
 .../23_containers/vector/cons/108487.cc       | 24 ++++++++++
 2 files changed, 56 insertions(+), 16 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc

diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index 21f6cd04f49..458adc987da 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -65,6 +65,9 @@
 #if __cplusplus >= 202002L
 # include <compare>
 #endif
+#if __glibcxx_concepts // C++ >= C++20
+# include <bits/ranges_base.h>          // ranges::distance
+#endif
 #if __glibcxx_ranges_to_container // C++ >= 23
 # include <bits/ranges_algobase.h>      // ranges::copy
 # include <bits/ranges_util.h>          // ranges::subrange
@@ -706,8 +709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
             const allocator_type& __a = allocator_type())
       : _Base(__a)
       {
-       _M_range_initialize(__l.begin(), __l.end(),
-                           random_access_iterator_tag());
+       _M_range_initialize_n(__l.begin(), __l.end(), __l.size());
       }
 #endif
 
@@ -735,6 +737,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
               const allocator_type& __a = allocator_type())
        : _Base(__a)
        {
+#if __glibcxx_concepts // C++ >= C++20
+         if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>
+                         || forward_iterator<_InputIterator>)
+           {
+             const auto __n
+               = static_cast<size_type>(ranges::distance(__first, __last));
+             _M_range_initialize_n(__first, __last, __n);
+             return;
+           }
+         else
+#endif
          _M_range_initialize(__first, __last,
                              std::__iterator_category(__first));
        }
@@ -763,13 +776,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
        {
          if constexpr (ranges::forward_range<_Rg> || ranges::sized_range<_Rg>)
            {
-             const auto __n = size_type(ranges::distance(__rg));
-             pointer __start =
-               this->_M_allocate(_S_check_init_len(__n,
-                                                   _M_get_Tp_allocator()));
-             this->_M_impl._M_finish = this->_M_impl._M_start = __start;
-             this->_M_impl._M_end_of_storage = __start + __n;
-             _Base::_M_append_range(__rg);
+             const auto __n = static_cast<size_type>(ranges::distance(__rg));
+             _M_range_initialize_n(ranges::begin(__rg), ranges::end(__rg),
+                                   __n);
            }
          else
            {
@@ -1962,15 +1971,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
        _M_range_initialize(_ForwardIterator __first, _ForwardIterator __last,
                            std::forward_iterator_tag)
        {
-         const size_type __n = std::distance(__first, __last);
-         pointer __start =
+         _M_range_initialize_n(__first, __last,
+                               std::distance(__first, __last));
+       }
+
+      template<typename _Iterator, typename _Sentinel>
+       _GLIBCXX20_CONSTEXPR
+       void
+       _M_range_initialize_n(_Iterator __first, _Sentinel __last,
+                             size_type __n)
+       {
+         pointer __start = this->_M_impl._M_start =
            this->_M_allocate(_S_check_init_len(__n, _M_get_Tp_allocator()));
-         _Guard_alloc __guard(__start, __n, *this);
-         this->_M_impl._M_finish = std::__uninitialized_copy_a
-           (__first, __last, __start, _M_get_Tp_allocator());
-         this->_M_impl._M_start = __start;
-         (void) __guard._M_release();
          this->_M_impl._M_end_of_storage = __start + __n;
+         this->_M_impl._M_finish
+             = std::__uninitialized_copy_a(_GLIBCXX_MOVE(__first), __last,
+                                           __start, _M_get_Tp_allocator());
        }
 
       // Called by the first initialize_dispatch above and by the
diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc 
b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
new file mode 100644
index 00000000000..13f2c478e79
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
@@ -0,0 +1,24 @@
+// { dg-do run { target c++20 } }
+// Bug libstdc++/108487
+// ~20-30x slowdown in populating std::vector from std::ranges::iota_view
+
+#include <ranges>
+#include <testsuite_hooks.h>
+#include <testsuite_allocator.h>
+
+void
+test_pr108487()
+{
+  using __gnu_test::tracker_allocator;
+  using __gnu_test::tracker_allocator_counter;
+  auto r = std::ranges::iota_view{0, 20};
+  tracker_allocator_counter::reset();
+  std::vector<int, tracker_allocator<int>> v{r.begin(), r.end()};
+  const std::size_t bytes = v.capacity() * sizeof(v.front());
+  VERIFY( tracker_allocator_counter::get_allocation_count() == bytes );
+}
+
+int main()
+{
+  test_pr108487();
+}
-- 
2.49.0

Reply via email to