On Tue, 25 Mar 2025 at 16:49, Tomasz Kaminski <tkami...@redhat.com> wrote:
>
>
>
> On Tue, Mar 25, 2025 at 5:30 PM Jonathan Wakely <jwak...@redhat.com> wrote:
>>
>> LWG 3291 make std::ranges::iota_view's iterator have input_iterator_tag
>> as its iterator_category, even though it satisfies the C++20
>> std::forward_iterator concept. This means that the traditional
>> std::vector::vector(InputIterator, InputIterator) constructor treats
>> iota_view iterators as input iterators, because it only understands the
>> C++17 iterator requirements, not the C++20 iterator concepts. This
>> results in a loop that calls emplace_back for each individual element of
>> the iota_view, requiring the vector to reallocate repeatedly as the
>> values are inserted. This makes it unnecessarily slow to construct a
>> vector from an iota_view.
>>
>> This change adds a new _M_range_initialize_n function for initializing a
>> vector from a range (which doesn't have to be common) and a size. This
>> new function can be used by vector(InputIterator, InputIterator) and
>> vector(from_range_t, R&&) when std::ranges::distance can be used to get
>> the size. It can also be used by the _M_range_initialize overload that
>> gets the size for a Cpp17ForwardIterator pair using std::distance, and
>> by the vector(initializer_list) constructor.
>>
>> With this new function constructing a std::vector from iota_view does
>> a single allocation of the correct size and so doesn't need to
>> reallocate in a loop.
>>
>> libstdc++-v3/ChangeLog:
>>
>>         PR libstdc++/108487
>>         * include/bits/stl_vector.h (vector(initializer_list)): Call
>>         _M_range_initialize_n instead of _M_range_initialize.
>>         (vector(InputIterator, InputIterator)): Use _M_range_initialize_n
>>         for C++20 sized sentinels and forward iterators.
>>         (vector(from_range_t, R&&)): Use _M_range_initialize_n for sized
>>         ranges and forward ranges.
>>         (vector::_M_range_initialize(FwIt, FwIt, forward_iterator_tag)):
>>         Likewise.
>>         (vector::_M_range_initialize_n): New function.
>>         * testsuite/23_containers/vector/cons/108487.cc: New test.
>> ---
>>
>> Tests running for x86_64-linux.
>>
>> This gives a 10x speed up for the PR108487 testcase using iota_view.
>>
>> I don't see why doing this wouldn't be allowed by the standard, so it
>> seems worth doing.
>>
>>  libstdc++-v3/include/bits/stl_vector.h        | 48 ++++++++++++-------
>>  .../23_containers/vector/cons/108487.cc       | 24 ++++++++++
>>  2 files changed, 56 insertions(+), 16 deletions(-)
>>  create mode 100644 
>> libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
>>
>> diff --git a/libstdc++-v3/include/bits/stl_vector.h 
>> b/libstdc++-v3/include/bits/stl_vector.h
>> index 21f6cd04f49..458adc987da 100644
>> --- a/libstdc++-v3/include/bits/stl_vector.h
>> +++ b/libstdc++-v3/include/bits/stl_vector.h
>> @@ -65,6 +65,9 @@
>>  #if __cplusplus >= 202002L
>>  # include <compare>
>>  #endif
>> +#if __glibcxx_concepts // C++ >= C++20
>> +# include <bits/ranges_base.h>          // ranges::distance
>> +#endif
>>  #if __glibcxx_ranges_to_container // C++ >= 23
>>  # include <bits/ranges_algobase.h>      // ranges::copy
>>  # include <bits/ranges_util.h>          // ranges::subrange
>> @@ -706,8 +709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
>>              const allocator_type& __a = allocator_type())
>>        : _Base(__a)
>>        {
>> -       _M_range_initialize(__l.begin(), __l.end(),
>> -                           random_access_iterator_tag());
>> +       _M_range_initialize_n(__l.begin(), __l.end(), __l.size());
>>        }
>>  #endif
>>
>> @@ -735,6 +737,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
>>                const allocator_type& __a = allocator_type())
>>         : _Base(__a)
>>         {
>> +#if __glibcxx_concepts // C++ >= C++20
>> +         if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>
>> +                         || forward_iterator<_InputIterator>)
>> +           {
>> +             const auto __n
>> +               = static_cast<size_type>(ranges::distance(__first, __last));
>> +             _M_range_initialize_n(__first, __last, __n);
>> +             return;
>> +           }
>> +         else
>> +#endif
>>           _M_range_initialize(__first, __last,
>>                               std::__iterator_category(__first));
>>         }
>> @@ -763,13 +776,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
>>         {
>>           if constexpr (ranges::forward_range<_Rg> || 
>> ranges::sized_range<_Rg>)
>>             {
>> -             const auto __n = size_type(ranges::distance(__rg));
>> -             pointer __start =
>> -               this->_M_allocate(_S_check_init_len(__n,
>> -                                                   _M_get_Tp_allocator()));
>> -             this->_M_impl._M_finish = this->_M_impl._M_start = __start;
>> -             this->_M_impl._M_end_of_storage = __start + __n;
>> -             _Base::_M_append_range(__rg);
>> +             const auto __n = 
>> static_cast<size_type>(ranges::distance(__rg));
>> +             _M_range_initialize_n(ranges::begin(__rg), ranges::end(__rg),
>> +                                   __n);
>>             }
>>           else
>>             {
>> @@ -1962,15 +1971,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
>>         _M_range_initialize(_ForwardIterator __first, _ForwardIterator 
>> __last,
>>                             std::forward_iterator_tag)
>>         {
>> -         const size_type __n = std::distance(__first, __last);
>> -         pointer __start =
>> +         _M_range_initialize_n(__first, __last,
>> +                               std::distance(__first, __last));
>> +       }
>> +
>> +      template<typename _Iterator, typename _Sentinel>
>> +       _GLIBCXX20_CONSTEXPR
>> +       void
>> +       _M_range_initialize_n(_Iterator __first, _Sentinel __last,
>> +                             size_type __n)
>> +       {
>> +         pointer __start = this->_M_impl._M_start =
>>             this->_M_allocate(_S_check_init_len(__n, _M_get_Tp_allocator()));
>> -         _Guard_alloc __guard(__start, __n, *this);
>> -         this->_M_impl._M_finish = std::__uninitialized_copy_a
>> -           (__first, __last, __start, _M_get_Tp_allocator());
>> -         this->_M_impl._M_start = __start;
>> -         (void) __guard._M_release();
>>           this->_M_impl._M_end_of_storage = __start + __n;
>> +         this->_M_impl._M_finish
>> +             = std::__uninitialized_copy_a(_GLIBCXX_MOVE(__first), __last,
>> +                                           __start, _M_get_Tp_allocator());
>
> Should we guard the allocation using   _Guard_alloc here, as it was done 
> previously for _M_range_initialize?

I forgot to mention that in the commit message. I don't think the
_Guard_alloc was ever needed.

The ~_Vector_base destructor does

      _GLIBCXX20_CONSTEXPR
      ~_Vector_base() _GLIBCXX_NOEXCEPT
      {
        _M_deallocate(_M_impl._M_start,
                      _M_impl._M_end_of_storage - _M_impl._M_start);
      }

So if we just assign the allocation to _M_impl._M_start immediately,
and set _M_impl._M_end_of_storage, then the base class guards it for
us. There is no need for a local _Guard_alloc.


>>
>>         }
>>
>>        // Called by the first initialize_dispatch above and by the
>> diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc 
>> b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
>> new file mode 100644
>> index 00000000000..13f2c478e79
>> --- /dev/null
>> +++ b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc
>> @@ -0,0 +1,24 @@
>> +// { dg-do run { target c++20 } }
>> +// Bug libstdc++/108487
>> +// ~20-30x slowdown in populating std::vector from std::ranges::iota_view
>> +
>> +#include <ranges>
>> +#include <testsuite_hooks.h>
>> +#include <testsuite_allocator.h>
>> +
>> +void
>> +test_pr108487()
>> +{
>> +  using __gnu_test::tracker_allocator;
>> +  using __gnu_test::tracker_allocator_counter;
>> +  auto r = std::ranges::iota_view{0, 20};
>> +  tracker_allocator_counter::reset();
>> +  std::vector<int, tracker_allocator<int>> v{r.begin(), r.end()};
>> +  const std::size_t bytes = v.capacity() * sizeof(v.front());
>> +  VERIFY( tracker_allocator_counter::get_allocation_count() == bytes );
>> +}
>> +
>> +int main()
>> +{
>> +  test_pr108487();
>> +}
>> --
>> 2.49.0
>>

Reply via email to