This reduces the size of <regex> a little. This is one of the largest and slowest headers in the library.
By using <bits/stl_algobase.h> and <bits/stl_algo.h> instead of <algorithm> we don't need to parse all the parallel algorithms and std::ranges:: algorithms that are not needed by <regex>. Similarly, by using <bits/stl_tree.h> and <bits/stl_map.h> instead of <map> we don't need to parse the definition of std::multimap. The _State_info type is not movable or copyable, so doesn't need to use std::unique_ptr<bool[]> to manage a bitset, we can just delete it in the destructor. It would use a lot less space if we used a bitset instead, but that would be an ABI break. We could do it for the versioned namespace, but this patch doesn't do so. For future reference, using vector<bool> would work, but would increase sizeof(_State_info) by two pointers, because it's three times as large as unique_ptr<bool[]>. We can't use std::bitset because the length isn't constant. We want a bitset with a non-constant but fixed length. Signed-off-by: Jonathan Wakely <jwak...@redhat.com> libstdc++-v3/ChangeLog: * include/bits/regex_executor.h (_State_info): Replace unique_ptr<bool[]> with array of bool. * include/bits/regex_executor.tcc: Likewise. * include/bits/regex_scanner.tcc: Replace std::strchr with __builtin_strchr. * include/std/regex: Replace standard headers with smaller internal ones. * testsuite/28_regex/traits/char/lookup_classname.cc: Include <string.h> for strlen. * testsuite/28_regex/traits/char/lookup_collatename.cc: Likewise. Tested powerpc64le-linux. Committed to trunk.
commit e9f64fff64d83f5fcaa9ff17f1688490f75bdcb7 Author: Jonathan Wakely <jwak...@redhat.com> Date: Mon Aug 2 18:34:19 2021 libstdc++: Reduce header dependencies in <regex> This reduces the size of <regex> a little. This is one of the largest and slowest headers in the library. By using <bits/stl_algobase.h> and <bits/stl_algo.h> instead of <algorithm> we don't need to parse all the parallel algorithms and std::ranges:: algorithms that are not needed by <regex>. Similarly, by using <bits/stl_tree.h> and <bits/stl_map.h> instead of <map> we don't need to parse the definition of std::multimap. The _State_info type is not movable or copyable, so doesn't need to use std::unique_ptr<bool[]> to manage a bitset, we can just delete it in the destructor. It would use a lot less space if we used a bitset instead, but that would be an ABI break. We could do it for the versioned namespace, but this patch doesn't do so. For future reference, using vector<bool> would work, but would increase sizeof(_State_info) by two pointers, because it's three times as large as unique_ptr<bool[]>. We can't use std::bitset because the length isn't constant. We want a bitset with a non-constant but fixed length. Signed-off-by: Jonathan Wakely <jwak...@redhat.com> libstdc++-v3/ChangeLog: * include/bits/regex_executor.h (_State_info): Replace unique_ptr<bool[]> with array of bool. * include/bits/regex_executor.tcc: Likewise. * include/bits/regex_scanner.tcc: Replace std::strchr with __builtin_strchr. * include/std/regex: Replace standard headers with smaller internal ones. * testsuite/28_regex/traits/char/lookup_classname.cc: Include <string.h> for strlen. * testsuite/28_regex/traits/char/lookup_collatename.cc: Likewise. diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 4a641eeee6c..014b4e83064 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -195,6 +195,11 @@ namespace __detail : _M_visited_states(new bool[__n]()), _M_start(__start) { } + ~_State_info() { delete[] _M_visited_states; } + + _State_info(const _State_info&) = delete; + _State_info& operator=(const _State_info&) = delete; + bool _M_visited(_StateIdT __i) { if (_M_visited_states[__i]) @@ -212,7 +217,7 @@ namespace __detail // Saves states that need to be considered for the next character. vector<pair<_StateIdT, _ResultsVec>> _M_match_queue; // Indicates which states are already visited. - unique_ptr<bool[]> _M_visited_states; + bool* _M_visited_states; // To record current solution. _StateIdT _M_start; }; diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 405d1c4d0d1..3cefeda48a3 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -122,7 +122,7 @@ namespace __detail _M_has_sol = false; if (_M_states._M_match_queue.empty()) break; - std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false); + std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false); auto __old_queue = std::move(_M_states._M_match_queue); for (auto& __task : __old_queue) { diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc index a3512083f0e..a9d6a613648 100644 --- a/libstdc++-v3/include/bits/regex_scanner.tcc +++ b/libstdc++-v3/include/bits/regex_scanner.tcc @@ -98,7 +98,7 @@ namespace __detail { auto __c = *_M_current++; - if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) + if (__builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) { _M_token = _S_token_ord_char; _M_value.assign(1, __c); @@ -394,7 +394,7 @@ namespace __detail "Unexpected end of regex when escaping."); auto __c = *_M_current; - auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); + auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); if (__pos != nullptr && *__pos != '\0') { diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index e623a6ed498..04fb8b2d971 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -35,26 +35,26 @@ # include <bits/c++0x_warning.h> #else -#include <algorithm> #include <bitset> -#ifdef _GLIBCXX_DEBUG -# include <iosfwd> -#endif -#include <iterator> #include <locale> -#include <memory> #include <sstream> #include <stack> #include <stdexcept> #include <string> -#include <vector> -#include <map> -#include <cstring> #include <ext/aligned_buffer.h> #include <ext/numeric_traits.h> +#include <bits/shared_ptr.h> #include <bits/std_function.h> +#include <bits/stl_algobase.h> // std::copy, std::fill_n +#include <bits/stl_algo.h> // std::sort, std::unique +#include <bits/stl_iterator_base_types.h> // std::iterator_traits #include <bits/stl_pair.h> +#include <bits/stl_tree.h> +#include <bits/stl_map.h> +#include <bits/stl_vector.h> +#include <bits/stl_bvector.h> +#include <bits/vector.tcc> #include <bits/regex_constants.h> #include <bits/regex_error.h> #include <bits/regex_automaton.h> diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc index ffaed97e2a7..6c2baf21b56 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_classname.cc @@ -27,6 +27,7 @@ #include <regex> #include <forward_list> +#include <string.h> #include <testsuite_hooks.h> void diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc index 3780c40729d..35447d49f7c 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc @@ -27,6 +27,7 @@ #include <regex> #include <forward_list> +#include <string.h> #include <testsuite_hooks.h> void