On Tue, Mar 25, 2025 at 6:20 PM Jonathan Wakely <jwak...@redhat.com> wrote:
> On Tue, 25 Mar 2025 at 16:49, Tomasz Kaminski <tkami...@redhat.com> wrote: > > > > > > > > On Tue, Mar 25, 2025 at 5:30 PM Jonathan Wakely <jwak...@redhat.com> > wrote: > >> > >> LWG 3291 make std::ranges::iota_view's iterator have input_iterator_tag > >> as its iterator_category, even though it satisfies the C++20 > >> std::forward_iterator concept. This means that the traditional > >> std::vector::vector(InputIterator, InputIterator) constructor treats > >> iota_view iterators as input iterators, because it only understands the > >> C++17 iterator requirements, not the C++20 iterator concepts. This > >> results in a loop that calls emplace_back for each individual element of > >> the iota_view, requiring the vector to reallocate repeatedly as the > >> values are inserted. This makes it unnecessarily slow to construct a > >> vector from an iota_view. > >> > >> This change adds a new _M_range_initialize_n function for initializing a > >> vector from a range (which doesn't have to be common) and a size. This > >> new function can be used by vector(InputIterator, InputIterator) and > >> vector(from_range_t, R&&) when std::ranges::distance can be used to get > >> the size. It can also be used by the _M_range_initialize overload that > >> gets the size for a Cpp17ForwardIterator pair using std::distance, and > >> by the vector(initializer_list) constructor. > >> > >> With this new function constructing a std::vector from iota_view does > >> a single allocation of the correct size and so doesn't need to > >> reallocate in a loop. > >> > >> libstdc++-v3/ChangeLog: > >> > >> PR libstdc++/108487 > >> * include/bits/stl_vector.h (vector(initializer_list)): Call > >> _M_range_initialize_n instead of _M_range_initialize. > >> (vector(InputIterator, InputIterator)): Use > _M_range_initialize_n > >> for C++20 sized sentinels and forward iterators. > >> (vector(from_range_t, R&&)): Use _M_range_initialize_n for sized > >> ranges and forward ranges. > >> (vector::_M_range_initialize(FwIt, FwIt, forward_iterator_tag)): > >> Likewise. > >> (vector::_M_range_initialize_n): New function. > >> * testsuite/23_containers/vector/cons/108487.cc: New test. > >> --- > >> > >> Tests running for x86_64-linux. > >> > >> This gives a 10x speed up for the PR108487 testcase using iota_view. > >> > >> I don't see why doing this wouldn't be allowed by the standard, so it > >> seems worth doing. > >> > >> libstdc++-v3/include/bits/stl_vector.h | 48 ++++++++++++------- > >> .../23_containers/vector/cons/108487.cc | 24 ++++++++++ > >> 2 files changed, 56 insertions(+), 16 deletions(-) > >> create mode 100644 > libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > >> > >> diff --git a/libstdc++-v3/include/bits/stl_vector.h > b/libstdc++-v3/include/bits/stl_vector.h > >> index 21f6cd04f49..458adc987da 100644 > >> --- a/libstdc++-v3/include/bits/stl_vector.h > >> +++ b/libstdc++-v3/include/bits/stl_vector.h > >> @@ -65,6 +65,9 @@ > >> #if __cplusplus >= 202002L > >> # include <compare> > >> #endif > >> +#if __glibcxx_concepts // C++ >= C++20 > >> +# include <bits/ranges_base.h> // ranges::distance > >> +#endif > >> #if __glibcxx_ranges_to_container // C++ >= 23 > >> # include <bits/ranges_algobase.h> // ranges::copy > >> # include <bits/ranges_util.h> // ranges::subrange > >> @@ -706,8 +709,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > >> const allocator_type& __a = allocator_type()) > >> : _Base(__a) > >> { > >> - _M_range_initialize(__l.begin(), __l.end(), > >> - random_access_iterator_tag()); > >> + _M_range_initialize_n(__l.begin(), __l.end(), __l.size()); > >> } > >> #endif > >> > >> @@ -735,6 +737,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > >> const allocator_type& __a = allocator_type()) > >> : _Base(__a) > >> { > >> +#if __glibcxx_concepts // C++ >= C++20 > >> + if constexpr (sized_sentinel_for<_InputIterator, > _InputIterator> > >> + || forward_iterator<_InputIterator>) > >> + { > >> + const auto __n > >> + = static_cast<size_type>(ranges::distance(__first, > __last)); > >> + _M_range_initialize_n(__first, __last, __n); > >> + return; > >> + } > >> + else > >> +#endif > >> _M_range_initialize(__first, __last, > >> std::__iterator_category(__first)); > >> } > >> @@ -763,13 +776,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > >> { > >> if constexpr (ranges::forward_range<_Rg> || > ranges::sized_range<_Rg>) > >> { > >> - const auto __n = size_type(ranges::distance(__rg)); > >> - pointer __start = > >> - this->_M_allocate(_S_check_init_len(__n, > >> - > _M_get_Tp_allocator())); > >> - this->_M_impl._M_finish = this->_M_impl._M_start = > __start; > >> - this->_M_impl._M_end_of_storage = __start + __n; > >> - _Base::_M_append_range(__rg); > >> + const auto __n = > static_cast<size_type>(ranges::distance(__rg)); > >> + _M_range_initialize_n(ranges::begin(__rg), > ranges::end(__rg), > >> + __n); > >> } > >> else > >> { > >> @@ -1962,15 +1971,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > >> _M_range_initialize(_ForwardIterator __first, _ForwardIterator > __last, > >> std::forward_iterator_tag) > >> { > >> - const size_type __n = std::distance(__first, __last); > >> - pointer __start = > >> + _M_range_initialize_n(__first, __last, > >> + std::distance(__first, __last)); > >> + } > >> + > >> + template<typename _Iterator, typename _Sentinel> > >> + _GLIBCXX20_CONSTEXPR > >> + void > >> + _M_range_initialize_n(_Iterator __first, _Sentinel __last, > >> + size_type __n) > >> + { > >> + pointer __start = this->_M_impl._M_start = > >> this->_M_allocate(_S_check_init_len(__n, > _M_get_Tp_allocator())); > >> - _Guard_alloc __guard(__start, __n, *this); > >> - this->_M_impl._M_finish = std::__uninitialized_copy_a > >> - (__first, __last, __start, _M_get_Tp_allocator()); > >> - this->_M_impl._M_start = __start; > >> - (void) __guard._M_release(); > >> this->_M_impl._M_end_of_storage = __start + __n; > >> + this->_M_impl._M_finish > >> + = std::__uninitialized_copy_a(_GLIBCXX_MOVE(__first), > __last, > >> + __start, > _M_get_Tp_allocator()); > > > > Should we guard the allocation using _Guard_alloc here, as it was done > previously for _M_range_initialize? > > I forgot to mention that in the commit message. I don't think the > _Guard_alloc was ever needed. > > The ~_Vector_base destructor does > > _GLIBCXX20_CONSTEXPR > ~_Vector_base() _GLIBCXX_NOEXCEPT > { > _M_deallocate(_M_impl._M_start, > _M_impl._M_end_of_storage - _M_impl._M_start); > } > > So if we just assign the allocation to _M_impl._M_start immediately, > and set _M_impl._M_end_of_storage, then the base class guards it for > us. There is no need for a local _Guard_alloc. > I have checked that _M_range_initialize is called only from constructors, or _M_initialize_dispatch, that is directly called in the constructor. So guards indeed seem to be redundant. Please add this to the comment message, and otherwise LGTM. > > > >> > >> } > >> > >> // Called by the first initialize_dispatch above and by the > >> diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > >> new file mode 100644 > >> index 00000000000..13f2c478e79 > >> --- /dev/null > >> +++ b/libstdc++-v3/testsuite/23_containers/vector/cons/108487.cc > >> @@ -0,0 +1,24 @@ > >> +// { dg-do run { target c++20 } } > >> +// Bug libstdc++/108487 > >> +// ~20-30x slowdown in populating std::vector from > std::ranges::iota_view > >> + > >> +#include <ranges> > >> +#include <testsuite_hooks.h> > >> +#include <testsuite_allocator.h> > >> + > >> +void > >> +test_pr108487() > >> +{ > >> + using __gnu_test::tracker_allocator; > >> + using __gnu_test::tracker_allocator_counter; > >> + auto r = std::ranges::iota_view{0, 20}; > >> + tracker_allocator_counter::reset(); > >> + std::vector<int, tracker_allocator<int>> v{r.begin(), r.end()}; > >> + const std::size_t bytes = v.capacity() * sizeof(v.front()); > >> + VERIFY( tracker_allocator_counter::get_allocation_count() == bytes ); > >> +} > >> + > >> +int main() > >> +{ > >> + test_pr108487(); > >> +} > >> -- > >> 2.49.0 > >> > >