As we are on patching algos we still have this old one.
From the original patch I only kept the memory optimization part as
the new performance test was not showing good result for the other part
to change pivot value. I also kept the small change in
get_temporary_buffer even if I don't have strong feeling about it, it
just make sure that we'll try to allocate 1 element as a last chance
allocation.
Note that there is still place for an improvement. If we miss
memory on the heap we then use a recursive implementation which then
rely on stack memory. I would be surprise that a system which miss heap
memory would have no problem to allocate about the same on the stack so
we will surely end up in a stack overflow. I still have this on my todo
even if I already made several tries with no satisfying result in terms
of performance.
Tested under Linux x86_64.
Commit message:
libstdc++: Limit memory allocation in stable_sort/inplace_merge (PR
83938)
Reduce memory consumption in stable_sort/inplace_merge to what is used.
Co-authored-by: François Dumont <fdum...@gcc.gnu.org>
libstdc++-v3/ChangeLog:
2020-06-11 John Chang <john.ch...@samba.tv>
François Dumont <fdum...@gcc.gnu.org>
PR libstdc++/83938
* include/bits/stl_tempbuf.h (get_temporary_buffer): Change
__len
computation in the loop.
* include/bits/stl_algo.h:
(__inplace_merge): Take temporary buffer length from
smallest range.
(__stable_sort): Limit temporary buffer length.
* testsuite/25_algorithms/inplace_merge/1.cc (test03): Test
different
pivot positions.
* testsuite/performance/25_algorithms/stable_sort.cc: Test
stable_sort
under different heap memory conditions.
* testsuite/performance/25_algorithms/inplace_merge.cc: New.
Ok to commit ?
François
diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h
index fd6edd0d5f4..44798ffaa7f 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -2531,7 +2531,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _DistanceType __len2 = std::distance(__middle, __last);
typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
- _TmpBuf __buf(__first, __len1 + __len2);
+ _TmpBuf __buf(__first, std::min(__len1, __len2));
if (__buf.begin() == 0)
std::__merge_without_buffer
@@ -2740,6 +2740,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp);
std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp);
}
+
std::__merge_adaptive(__first, __middle, __last,
_Distance(__middle - __first),
_Distance(__last - __middle),
@@ -5006,8 +5007,11 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
typedef typename iterator_traits<_RandomAccessIterator>::difference_type
_DistanceType;
+ if (__first == __last)
+ return;
+
typedef _Temporary_buffer<_RandomAccessIterator, _ValueType> _TmpBuf;
- _TmpBuf __buf(__first, std::distance(__first, __last));
+ _TmpBuf __buf(__first, (__last - __first + 1) / 2);
if (__buf.begin() == 0)
std::__inplace_stable_sort(__first, __last, __comp);
diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h b/libstdc++-v3/include/bits/stl_tempbuf.h
index f6f17960472..d76ed7f7ea6 100644
--- a/libstdc++-v3/include/bits/stl_tempbuf.h
+++ b/libstdc++-v3/include/bits/stl_tempbuf.h
@@ -110,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::nothrow));
if (__tmp != 0)
return std::pair<_Tp*, ptrdiff_t>(__tmp, __len);
- __len /= 2;
+ __len = __len == 1 ? 0 : ((__len + 1) / 2);
}
return std::pair<_Tp*, ptrdiff_t>(static_cast<_Tp*>(0), 0);
}
diff --git a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
index 5859f0363d5..7f78687d0aa 100644
--- a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
@@ -28,7 +28,7 @@ using std::inplace_merge;
typedef test_container<int, bidirectional_iterator_wrapper> container;
-void
+void
test1()
{
int array[] = { 1 };
@@ -39,7 +39,7 @@ test1()
inplace_merge(con2.begin(), con2.begin(), con2.end());
}
-void
+void
test2()
{
int array[] = { 0, 2, 4, 1, 3, 5 };
@@ -60,30 +60,34 @@ struct S
{ return a < _s.a; }
};
-void
+void
test3()
{
S s[8];
- s[0].a = 0;
- s[1].a = 1;
- s[2].a = 2;
- s[3].a = 3;
- s[4].a = 0;
- s[5].a = 1;
- s[6].a = 2;
- s[7].a = 3;
-
- s[0].b = 0;
- s[1].b = 1;
- s[2].b = 2;
- s[3].b = 3;
- s[4].b = 4;
- s[5].b = 5;
- s[6].b = 6;
- s[7].b = 7;
-
- inplace_merge(s, s + 4, s + 8);
- VERIFY( s[0].b == 0 && s[1].b == 4 && s[2].b == 1 && s[3].b == 5 );
+ for (int pivot_idx = 0; pivot_idx < 8; ++pivot_idx)
+ {
+ int bval = 0;
+ for (int i = 0; i != pivot_idx; ++i)
+ {
+ s[i].a = i;
+ s[i].b = bval++;
+ }
+
+ for (int i = pivot_idx; i != 8; ++i)
+ {
+ s[i].a = i - pivot_idx;
+ s[i].b = bval++;
+ }
+
+ inplace_merge(s, s + pivot_idx, s + 8);
+
+ for (int i = 1; i < 8; ++i)
+ {
+ VERIFY( !(s[i] < s[i - 1]) );
+ if (s[i - 1].a == s[i].a)
+ VERIFY( s[i - 1].b < s[i].b );
+ }
+ }
}
int
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
new file mode 100644
index 00000000000..780c21912c7
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
@@ -0,0 +1,290 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <vector>
+#include <algorithm>
+#include <cmath>
+
+#include <testsuite_new_operators.h>
+#include <testsuite_performance.h>
+
+const int max_size = 10000000;
+const int small_size = 200000;
+const int front_pivot_idx = 10000;
+int middle_pivot_idx = max_size / 2;
+int back_pivot_idx = max_size - front_pivot_idx;
+
+void bench(int mem_threshold, int pivot_index,
+ std::vector<int> revv,
+ std::vector<int> fwdv,
+ std::vector<int> wstv,
+ std::vector<int> rndv)
+{
+ using namespace __gnu_test;
+
+ time_counter time;
+ resource_counter resource;
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(revv.begin(), revv.begin() + pivot_index, revv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+
+ report_performance(__FILE__, "reverse", time, resource);
+ clear_counters(time, resource);
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(fwdv.begin(), fwdv.begin() + pivot_index, fwdv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+
+ report_performance(__FILE__, "forward", time, resource);
+ clear_counters(time, resource);
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(wstv.begin(), wstv.begin() + pivot_index, wstv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+
+ report_performance(__FILE__, "worst", time, resource);
+ clear_counters(time, resource);
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(rndv.begin(), rndv.begin() + pivot_index, rndv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+ report_performance(__FILE__, "random", time, resource);
+}
+
+void mem_bench(double mem_ratio,
+ const std::vector<int>& front_revv,
+ const std::vector<int>& middle_revv,
+ const std::vector<int>& back_revv,
+ const std::vector<int>& fwdv,
+ const std::vector<int>& front_wstv,
+ const std::vector<int>& middle_wstv,
+ const std::vector<int>& back_wstv,
+ const std::vector<int>& front_rndv,
+ const std::vector<int>& middle_rndv,
+ const std::vector<int>& back_rndv)
+{
+ using namespace __gnu_test;
+
+ time_counter time;
+ resource_counter resource;
+
+ int max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
+ start_counters(time, resource);
+ bench(max_mem, front_pivot_idx, front_revv, fwdv, front_wstv, front_rndv);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "front pivot", time, resource);
+ clear_counters(time, resource);
+
+ max_mem = (int)std::ceil(middle_pivot_idx * mem_ratio) * sizeof(int);
+ start_counters(time, resource);
+ bench(max_mem, middle_pivot_idx, middle_revv, fwdv, middle_wstv, middle_rndv);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "middle pivot", time, resource);
+ clear_counters(time, resource);
+
+ max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
+ start_counters(time, resource);
+ bench(max_mem, back_pivot_idx, back_revv, fwdv, back_wstv, back_rndv);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "back pivot", time, resource);
+}
+
+void init_reverse(std::vector<int>& v, size_t pivot_index)
+{
+ int val = 0;
+ for (size_t i = pivot_index; i != v.size(); ++i)
+ v[i] = val++;
+ for (size_t i = 0; i != pivot_index; ++i)
+ v[i] = val++;
+}
+
+void init_forward(std::vector<int>& v)
+{
+ int val = 0;
+ for (size_t i = 0; i != v.size(); ++i)
+ v[i] = val++;
+}
+
+void init_worst(std::vector<int>& v, size_t pivot_index)
+{
+ int val = 0;
+ if (pivot_index + 1 > v.size() / 2)
+ {
+ for (size_t i = 0; i != pivot_index; val += 2, ++i)
+ v[i] = val;
+ val = 1;
+ }
+ else
+ {
+ for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
+ v[i] = val;
+ val -= pivot_index * 2 + 1;
+ }
+
+ if (pivot_index + 1 > v.size() / 2)
+ for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
+ v[i] = val;
+ else
+ for (size_t i = 0; i != pivot_index; val += 2, ++i)
+ v[i] = val;
+}
+
+void init_random(std::vector<int>& v)
+{
+ // a simple pseudo-random series which does not rely on rand() and friends
+ v[0] = 0;
+ for (size_t i = 1; i != v.size(); ++i)
+ v[i] = (v[i-1] + 110211473) * 745988807;
+}
+
+void reduce_size(std::vector<int>& front_v,
+ std::vector<int>& middle_v,
+ std::vector<int>& back_v)
+{
+ front_v.erase(front_v.begin() + front_pivot_idx,
+ front_v.end() - back_pivot_idx);
+ middle_v.erase(middle_v.begin() + small_size / 2,
+ middle_v.end() - small_size / 2);
+ back_v.erase(back_v.begin() + back_pivot_idx,
+ back_v.end() - front_pivot_idx);
+}
+
+int main()
+{
+ using namespace __gnu_test;
+
+ // No constraint to build vectors.
+ set_new_limit(~size_t(0));
+
+ std::vector<int> front_revv(max_size);
+ init_reverse(front_revv, front_pivot_idx);
+
+ std::vector<int> middle_revv(max_size);
+ init_reverse(middle_revv, middle_pivot_idx);
+
+ std::vector<int> back_revv(max_size);
+ init_reverse(back_revv, back_pivot_idx);
+
+ std::vector<int> fwdv(max_size);
+ init_forward(fwdv);
+
+ std::vector<int> front_wstv(max_size);
+ init_worst(front_wstv, front_pivot_idx);
+
+ std::vector<int> middle_wstv(max_size);
+ init_worst(middle_wstv, middle_pivot_idx);
+
+ std::vector<int> back_wstv(max_size);
+ init_worst(back_wstv, back_pivot_idx);
+
+ std::vector<int> front_rndv(max_size);
+ init_random(front_rndv);
+ std::vector<int> middle_rndv(front_rndv);
+ std::vector<int> back_rndv(front_rndv);
+
+ sort(front_rndv.begin(), front_rndv.begin() + front_pivot_idx);
+ sort(front_rndv.begin() + front_pivot_idx, front_rndv.end());
+
+ sort(middle_rndv.begin(), middle_rndv.begin() + middle_pivot_idx);
+ sort(middle_rndv.begin() + middle_pivot_idx, middle_rndv.end());
+
+ sort(back_rndv.begin(), back_rndv.begin() + back_pivot_idx);
+ sort(back_rndv.begin() + back_pivot_idx, back_rndv.end());
+
+ time_counter time;
+ resource_counter resource;
+
+ start_counters(time, resource);
+
+ // No limit.
+ mem_bench(1.0,
+ front_revv, middle_revv, back_revv,
+ fwdv,
+ front_wstv, middle_wstv, back_wstv,
+ front_rndv, middle_rndv, back_rndv);
+
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+
+ // Limit to the fourth.
+ mem_bench(1.0 / 4,
+ front_revv, middle_revv, back_revv,
+ fwdv,
+ front_wstv, middle_wstv, back_wstv,
+ front_rndv, middle_rndv, back_rndv);
+
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+
+ // Really limit allocation.
+ mem_bench(1.0 / 64,
+ front_revv, middle_revv, back_revv,
+ fwdv,
+ front_wstv, middle_wstv, back_wstv,
+ front_rndv, middle_rndv, back_rndv);
+
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1 /64 memory", time, resource);
+ clear_counters(time, resource);
+
+ middle_pivot_idx = small_size / 2;
+ back_pivot_idx = small_size - front_pivot_idx;
+ reduce_size(front_revv, middle_revv, back_revv);
+ fwdv.resize(small_size);
+ reduce_size(front_wstv, middle_wstv, back_wstv);
+ reduce_size(front_rndv, middle_rndv, back_rndv);
+
+ start_counters(time, resource);
+
+ // No memory.
+ mem_bench(0.0,
+ front_revv, middle_revv, back_revv,
+ fwdv,
+ front_wstv, middle_wstv, back_wstv,
+ front_rndv, middle_rndv, back_rndv);
+
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
index 02b869dd195..fe526638aaf 100644
--- a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
@@ -17,49 +17,107 @@
#include <vector>
#include <algorithm>
+
+#include <testsuite_new_operators.h>
#include <testsuite_performance.h>
-int main()
+const int max_size = 10000000;
+const int small_size = 200000;
+
+void bench(size_t mem_threshold,
+ std::vector<int> revv,
+ std::vector<int> fwdv,
+ std::vector<int> rndv)
{
using namespace __gnu_test;
time_counter time;
resource_counter resource;
- const int max_size = 10000000;
-
- std::vector<int> v(max_size);
-
- for (int i = 0; i < max_size; ++i)
- v[i] = -i;
+ set_new_limit(mem_threshold);
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ std::stable_sort(revv.begin(), revv.end());
stop_counters(time, resource);
+ set_new_limit(~size_t(0));
report_performance(__FILE__, "reverse", time, resource);
clear_counters(time, resource);
- for (int i = 0; i < max_size; ++i)
- v[i] = i;
+ set_new_limit(mem_threshold);
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ std::stable_sort(fwdv.begin(), fwdv.end());
stop_counters(time, resource);
+ set_new_limit(~size_t(0));
report_performance(__FILE__, "forwards", time, resource);
clear_counters(time, resource);
- // a simple psuedo-random series which does not rely on rand() and friends
- v[0] = 0;
+ start_counters(time, resource);
+ std::stable_sort(rndv.begin(), rndv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+ report_performance(__FILE__, "random", time, resource);
+}
+
+int main()
+{
+ using namespace __gnu_test;
+
+ // No memory constraint.
+ set_new_limit(~size_t(0));
+
+ std::vector<int> revv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ revv[i] = -i;
+
+ std::vector<int> fwdv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ fwdv[i] = i;
+
+ // a simple pseudo-random series which does not rely on rand() and friends
+ std::vector<int> rndv(max_size);
+ rndv[0] = 0;
for (int i = 1; i < max_size; ++i)
- v[i] = (v[i-1] + 110211473) * 745988807;
+ rndv[i] = (rndv[i-1] + 110211473) * 745988807;
+
+ time_counter time;
+ resource_counter resource;
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ bench(~size_t(0), revv, fwdv, rndv);
stop_counters(time, resource);
- report_performance(__FILE__, "random", time, resource);
+ report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Limit to fourth the expected size of the sorted array.
+ bench(max_size * sizeof(int) / 4, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Limit to 1/64 of range size.
+ bench(max_size * sizeof(int) / 64, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1 /64 memory", time, resource);
+ clear_counters(time, resource);
+
+ revv.resize(small_size);
+ fwdv.resize(small_size);
+ rndv.resize(small_size);
+
+ start_counters(time, resource);
+ // Forbid any allocation.
+ bench(0, revv, fwdv, rndv);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
return 0;
}