In next few weeks I'm gonna sending patches for refactoring <regex>. The goal is to make the gigantic _Executor class into several smaller ones and hopefully more readable. After that, there are several correctness/performance issues to be fixed. We may also need more documentation, but self-documenting code is even more important.
I've almost finished my drafting (with a lot of off-history back and forth, but stablized finally): https://github.com/innocentim/gcc/commits/executer, but I don't want to commit this branch, since it's more like a mind experiment (and trust me, you won't follow the commit history of this branch ;). This patch doesn't change any behavior. I've tried my best not to sacrifies performance for this _Context abstraction (as all C++ programmers do :), so far so good. I let _Executor privately inherits from _Context; but alternatively we can just make it a member. Bootstrapped and tested. Take your time. This isn't urgent. My plan is to finish them by the next big release. Thanks! -- Regards, Tim Shen
commit 007f04c627b0e0ea83b4e2e818354254f4ce649a Author: Tim Shen <tims...@google.com> Date: Thu Aug 6 00:37:21 2015 -0700 * include/bits/regex.tcc: Adjust caller side for interface change. * include/bits/regex_executor.h: Created __regex namespace. Created _Context for common data members for all executers. Do not pass around __match_mode, but make it a _Context member (_M_search_mode). Deleted unnecessary member _State_info::_M_start. * include/bits/regex_executor.tcc: Adjust for interface/member change. * include/std/regex: Reorder included headers. diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc index fedc2b9..361ad32 100644 --- a/libstdc++-v3/include/bits/regex.tcc +++ b/libstdc++-v3/include/bits/regex.tcc @@ -61,25 +61,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __it.matched = false; bool __ret; + constexpr auto __mode = __match_mode + ? __regex::_Search_mode::_Match : __regex::_Search_mode::_Search; if ((__re.flags() & regex_constants::__polynomial) || (__policy == _RegexExecutorPolicy::_S_alternate && !__re._M_automaton->_M_has_backref)) { - _Executor<_BiIter, _Alloc, _TraitsT, false> - __executor(__s, __e, __m, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); + __regex::_Executor<_BiIter, _Alloc, _TraitsT, false> __executor( + __s, __e, *__re._M_automaton, __flags, __mode, __res); + __ret = __executor.template _M_match<__mode>(); } else { - _Executor<_BiIter, _Alloc, _TraitsT, true> - __executor(__s, __e, __m, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); + __regex::_Executor<_BiIter, _Alloc, _TraitsT, true> __executor( + __s, __e, *__re._M_automaton, __flags, __mode, __res); + __ret = __executor.template _M_match<__mode>(); } if (__ret) { diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index f3f8876..5310bb8 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -32,15 +32,71 @@ namespace std _GLIBCXX_VISIBILITY(default) { +_GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_CXX11 + + template<typename> + class sub_match; + +_GLIBCXX_END_NAMESPACE_CXX11 +_GLIBCXX_END_NAMESPACE_VERSION namespace __detail { +namespace __regex +{ _GLIBCXX_BEGIN_NAMESPACE_VERSION - /** * @addtogroup regex-detail * @{ */ + enum class _Search_mode : unsigned char { _Match, _Search }; + + template<typename _Bi_iter, typename _Traits> + struct _Context + { + using _Char_type = typename iterator_traits<_Bi_iter>::value_type; + + _Context(_Bi_iter __begin, _Bi_iter __end, const _NFA<_Traits>& __nfa, + regex_constants::match_flag_type __flags, + _Search_mode __search_mode) + : _M_begin(__begin), _M_end(__end), _M_match_flags(__flags), + _M_nfa(__nfa), _M_search_mode(__search_mode) { } + + bool + _M_is_word(_Char_type __ch) const + { + static const _Char_type __s[2] = { 'w' }; + return _M_nfa._M_traits.isctype + (__ch, _M_nfa._M_traits.lookup_classname(__s, __s+1)); + } + + bool + _M_at_begin() const + { + return _M_current == _M_begin + && !(_M_match_flags & (regex_constants::match_not_bol + | regex_constants::match_prev_avail)); + } + + bool + _M_at_end() const + { + return _M_current == _M_end + && !(_M_match_flags & regex_constants::match_not_eol); + } + + bool + _M_word_boundary() const; + + _Bi_iter _M_current; + _Bi_iter _M_begin; + const _Bi_iter _M_end; + const _NFA<_Traits>& _M_nfa; + regex_constants::match_flag_type _M_match_flags; + _Search_mode _M_search_mode; + }; + /** * @brief Takes a regex and an input string and does the matching. * @@ -49,118 +105,65 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> - class _Executor + class _Executor : private _Context<_BiIter, _TraitsT> { - using __search_mode = integral_constant<bool, __dfs_mode>; + using __algorithm = integral_constant<bool, __dfs_mode>; using __dfs = true_type; using __bfs = false_type; - - enum class _Match_mode : unsigned char { _Exact, _Prefix }; + using _Context_type = _Context<_BiIter, _TraitsT>; public: - typedef typename iterator_traits<_BiIter>::value_type _CharT; - typedef basic_regex<_CharT, _TraitsT> _RegexT; typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; - typedef regex_constants::match_flag_type _FlagT; - typedef typename _TraitsT::char_class_type _ClassT; - typedef _NFA<_TraitsT> _NFAT; public: - _Executor(_BiIter __begin, - _BiIter __end, - _ResultsVec& __results, - const _RegexT& __re, - _FlagT __flags) - : _M_begin(__begin), - _M_end(__end), - _M_re(__re), - _M_nfa(*__re._M_automaton), - _M_results(__results), - _M_rep_count(_M_nfa.size()), - _M_states(_M_nfa._M_start(), _M_nfa.size()), - _M_flags((__flags & regex_constants::match_prev_avail) - ? (__flags - & ~regex_constants::match_not_bol - & ~regex_constants::match_not_bow) - : __flags) + _Executor(_BiIter __begin, _BiIter __end, const _NFA<_TraitsT>& __nfa, + regex_constants::match_flag_type __flags, + _Search_mode __search_mode, _ResultsVec& __results) + : _Context_type(__begin, __end, __nfa, __flags, __search_mode), + _M_results(__results), _M_rep_count(this->_M_nfa.size()), + _M_states(this->_M_nfa.size()) { } - // Set matched when string exactly matches the pattern. - bool - _M_match() - { - _M_current = _M_begin; - return _M_main(_Match_mode::_Exact); - } - - // Set matched when some prefix of the string matches the pattern. - bool - _M_search_from_first() - { - _M_current = _M_begin; - return _M_main(_Match_mode::_Prefix); - } - - bool - _M_search(); + // __search_mode should be the same as this->_M_search_mode. It's + // slightly faster to let caller specify __search_mode again + // as a template argument. + // Still, in other places we prefer using the dynamic this->_M_search_mode, + // because it doesn't deserve being passed around as a template + // argument/function argument, since it's used in only a few places. + template<_Search_mode __search_mode> + bool + _M_match() + { return _M_match_impl<__search_mode>(this->_M_nfa._M_start()); } private: - void - _M_rep_once_more(_Match_mode __match_mode, _StateIdT); + template<_Search_mode __search_mode> + bool + _M_match_impl(_StateIdT __start); void - _M_dfs(_Match_mode __match_mode, _StateIdT __start); - - bool - _M_main(_Match_mode __match_mode) - { return _M_main_dispatch(__match_mode, __search_mode{}); } - - bool - _M_main_dispatch(_Match_mode __match_mode, __dfs); - - bool - _M_main_dispatch(_Match_mode __match_mode, __bfs); + _M_rep_once_more(_StateIdT); - bool - _M_is_word(_CharT __ch) const - { - static const _CharT __s[2] = { 'w' }; - return _M_re._M_automaton->_M_traits.isctype - (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1)); - } - - bool - _M_at_begin() const - { - return _M_current == _M_begin - && !(_M_flags & (regex_constants::match_not_bol - | regex_constants::match_prev_avail)); - } + void + _M_dfs(_StateIdT __start); bool - _M_at_end() const - { - return _M_current == _M_end - && !(_M_flags & regex_constants::match_not_eol); - } + _M_main_dispatch(_StateIdT __start, __dfs); bool - _M_word_boundary() const; + _M_main_dispatch(_StateIdT __start, __bfs); bool _M_lookahead(_StateIdT __next); // Holds additional information used in BFS-mode. - template<typename _SearchMode, typename _ResultsVec> + template<typename _Algorithm, typename _ResultsVec> struct _State_info; template<typename _ResultsVec> struct _State_info<__bfs, _ResultsVec> { explicit - _State_info(_StateIdT __start, size_t __n) - : _M_visited_states(new bool[__n]()), _M_start(__start) - { } + _State_info(size_t __n) : _M_visited_states(new bool[__n]()) { } bool _M_visited(_StateIdT __i) { @@ -180,16 +183,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION vector<pair<_StateIdT, _ResultsVec>> _M_match_queue; // Indicates which states are already visited. unique_ptr<bool[]> _M_visited_states; - // To record current solution. - _StateIdT _M_start; }; template<typename _ResultsVec> struct _State_info<__dfs, _ResultsVec> { explicit - _State_info(_StateIdT __start, size_t) : _M_start(__start) - { } + _State_info(size_t) { } // Dummy implementations for DFS mode. bool _M_visited(_StateIdT) const { return false; } @@ -197,28 +197,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } - // To record current solution. - _StateIdT _M_start; _BiIter _M_sol_pos; }; public: _ResultsVec _M_cur_results; - _BiIter _M_current; - _BiIter _M_begin; - const _BiIter _M_end; - const _RegexT& _M_re; - const _NFAT& _M_nfa; _ResultsVec& _M_results; vector<pair<_BiIter, int>> _M_rep_count; - _State_info<__search_mode, _ResultsVec> _M_states; - _FlagT _M_flags; + _State_info<__algorithm, _ResultsVec> _M_states; // Do we have a solution so far? bool _M_has_sol; }; //@} regex-detail _GLIBCXX_END_NAMESPACE_VERSION +} // namespace __regex } // namespace __detail } // namespace std diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 3fd17f6..db00bac 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -32,22 +32,58 @@ namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail { +namespace __regex +{ _GLIBCXX_BEGIN_NAMESPACE_VERSION + // Return whether now is at some word boundary. + template<typename _Bi_iter, typename _Traits> + inline bool _Context<_Bi_iter, _Traits>:: + _M_word_boundary() const + { + bool __left_is_word = false; + if (_M_current != _M_begin + || (_M_match_flags & regex_constants::match_prev_avail)) + { + auto __prev = _M_current; + if (_M_is_word(*std::prev(__prev))) + __left_is_word = true; + } + bool __right_is_word = + _M_current != _M_end && _M_is_word(*_M_current); + + if (__left_is_word == __right_is_word) + return false; + if (__left_is_word && !(_M_match_flags & regex_constants::match_not_eow)) + return true; + if (__right_is_word && !(_M_match_flags & regex_constants::match_not_bow)) + return true; + return false; + } + template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> + template<_Search_mode __search_mode> bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_search() + _M_match_impl(_StateIdT __start) { - if (_M_search_from_first()) + if (__search_mode == _Search_mode::_Match) + { + this->_M_current = this->_M_begin; + return _M_main_dispatch(__start, __algorithm{}); + } + + this->_M_current = this->_M_begin; + if (_M_main_dispatch(__start, __algorithm{})) return true; - if (_M_flags & regex_constants::match_continuous) + if (this->_M_match_flags & regex_constants::match_continuous) return false; - _M_flags |= regex_constants::match_prev_avail; - while (_M_begin != _M_end) + this->_M_match_flags |= regex_constants::match_prev_avail; + while (this->_M_begin != this->_M_end) { - ++_M_begin; - if (_M_search_from_first()) + ++this->_M_begin; + this->_M_current = this->_M_begin; + if (_M_main_dispatch(__start, __algorithm{})) return true; } return false; @@ -79,12 +115,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __dfs) + _M_main_dispatch(_StateIdT __start, __dfs) { _M_has_sol = false; *_M_states._M_get_sol_pos() = _BiIter(); _M_cur_results = _M_results; - _M_dfs(__match_mode, _M_states._M_start); + _M_dfs(__start); return _M_has_sol; } @@ -113,29 +149,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __bfs) + _M_main_dispatch(_StateIdT __start, __bfs) { - _M_states._M_queue(_M_states._M_start, _M_results); + _M_states._M_queue(__start, _M_results); bool __ret = false; while (1) { _M_has_sol = false; if (_M_states._M_match_queue.empty()) break; - std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false); + std::fill_n(_M_states._M_visited_states.get(), + this->_M_nfa.size(), false); auto __old_queue = std::move(_M_states._M_match_queue); for (auto& __task : __old_queue) { _M_cur_results = std::move(__task.second); - _M_dfs(__match_mode, __task.first); + _M_dfs(__task.first); } - if (__match_mode == _Match_mode::_Prefix) + if (this->_M_search_mode == _Search_mode::_Search) __ret |= _M_has_sol; - if (_M_current == _M_end) + if (this->_M_current == this->_M_end) break; - ++_M_current; + ++this->_M_current; } - if (__match_mode == _Match_mode::_Exact) + if (this->_M_search_mode == _Search_mode::_Match) __ret = _M_has_sol; _M_states._M_match_queue.clear(); return __ret; @@ -148,9 +185,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_lookahead(_StateIdT __next) { _ResultsVec __what(_M_cur_results.size()); - _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); - __sub._M_states._M_start = __next; - if (__sub._M_search_from_first()) + _Executor __sub( + this->_M_current, this->_M_end, this->_M_nfa, + this->_M_match_flags | regex_constants::match_continuous, + _Search_mode::_Search, __what); + if (__sub._M_match_impl<_Search_mode::_Search>(__next)) { for (size_t __i = 0; __i < __what.size(); __i++) if (__what[__i].matched) @@ -169,16 +208,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i) + _M_rep_once_more(_StateIdT __i) { - const auto& __state = _M_nfa[__i]; + const auto& __state = (this->_M_nfa)[__i]; auto& __rep_count = _M_rep_count[__i]; - if (__rep_count.second == 0 || __rep_count.first != _M_current) + if (__rep_count.second == 0 || __rep_count.first != this->_M_current) { auto __back = __rep_count; - __rep_count.first = _M_current; + __rep_count.first = this->_M_current; __rep_count.second = 1; - _M_dfs(__match_mode, __state._M_alt); + _M_dfs(__state._M_alt); __rep_count = __back; } else @@ -186,7 +225,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__rep_count.second < 2) { __rep_count.second++; - _M_dfs(__match_mode, __state._M_alt); + _M_dfs(__state._M_alt); __rep_count.second--; } } @@ -195,12 +234,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _BiIter, typename _Alloc, typename _TraitsT, bool __dfs_mode> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_dfs(_Match_mode __match_mode, _StateIdT __i) + _M_dfs(_StateIdT __i) { if (_M_states._M_visited(__i)) return; - const auto& __state = _M_nfa[__i]; + const auto& __state = (this->_M_nfa)[__i]; // Every change on _M_cur_results and _M_current will be rolled back after // finishing the recursion step. switch (__state._M_opcode()) @@ -214,19 +253,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Greedy. if (!__state._M_neg) { - _M_rep_once_more(__match_mode, __i); + _M_rep_once_more(__i); // If it's DFS executor and already accepted, we're done. if (!__dfs_mode || !_M_has_sol) - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); } else // Non-greedy mode { if (__dfs_mode) { // vice-versa. - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); if (!_M_has_sol) - _M_rep_once_more(__match_mode, __i); + _M_rep_once_more(__i); } else { @@ -235,12 +274,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // be better by attempting its next node. if (!_M_has_sol) { - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); // DON'T attempt anything if it's already accepted. An // accepted state *must* be better than a solution that // matches a non-greedy quantifier one more time. if (!_M_has_sol) - _M_rep_once_more(__match_mode, __i); + _M_rep_once_more(__i); } } } @@ -250,8 +289,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto& __res = _M_cur_results[__state._M_subexpr]; auto __back = __res.first; - __res.first = _M_current; - _M_dfs(__match_mode, __state._M_next); + __res.first = this->_M_current; + _M_dfs(__state._M_next); __res.first = __back; } break; @@ -259,44 +298,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto& __res = _M_cur_results[__state._M_subexpr]; auto __back = __res; - __res.second = _M_current; + __res.second = this->_M_current; __res.matched = true; - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); __res = __back; } break; case _S_opcode_line_begin_assertion: - if (_M_at_begin()) - _M_dfs(__match_mode, __state._M_next); + if (this->_M_at_begin()) + _M_dfs(__state._M_next); break; case _S_opcode_line_end_assertion: - if (_M_at_end()) - _M_dfs(__match_mode, __state._M_next); + if (this->_M_at_end()) + _M_dfs(__state._M_next); break; case _S_opcode_word_boundary: - if (_M_word_boundary() == !__state._M_neg) - _M_dfs(__match_mode, __state._M_next); + if (this->_M_word_boundary() == !__state._M_neg) + _M_dfs(__state._M_next); break; // Here __state._M_alt offers a single start node for a sub-NFA. // We recursively invoke our algorithm to match the sub-NFA. case _S_opcode_subexpr_lookahead: if (_M_lookahead(__state._M_alt) == !__state._M_neg) - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); break; case _S_opcode_match: - if (_M_current == _M_end) + if (this->_M_current == this->_M_end) break; if (__dfs_mode) { - if (__state._M_matches(*_M_current)) + if (__state._M_matches(*this->_M_current)) { - ++_M_current; - _M_dfs(__match_mode, __state._M_next); - --_M_current; + ++this->_M_current; + _M_dfs(__state._M_next); + --this->_M_current; } } else - if (__state._M_matches(*_M_current)) + if (__state._M_matches(*this->_M_current)) _M_states._M_queue(__state._M_next, _M_cur_results); break; // First fetch the matched result from _M_cur_results as __submatch; @@ -309,24 +348,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto& __submatch = _M_cur_results[__state._M_backref_index]; if (!__submatch.matched) break; - auto __last = _M_current; + auto __last = this->_M_current; for (auto __tmp = __submatch.first; - __last != _M_end && __tmp != __submatch.second; + __last != this->_M_end && __tmp != __submatch.second; ++__tmp) ++__last; - if (_M_re._M_automaton->_M_traits.transform(__submatch.first, - __submatch.second) - == _M_re._M_automaton->_M_traits.transform(_M_current, __last)) + if (this->_M_nfa._M_traits.transform(__submatch.first, + __submatch.second) + == this->_M_nfa._M_traits.transform(this->_M_current, __last)) { - if (__last != _M_current) + if (__last != this->_M_current) { - auto __backup = _M_current; - _M_current = __last; - _M_dfs(__match_mode, __state._M_next); - _M_current = __backup; + auto __backup = this->_M_current; + this->_M_current = __last; + _M_dfs(__state._M_next); + this->_M_current = __backup; } else - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); } } break; @@ -334,33 +373,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__dfs_mode) { _GLIBCXX_DEBUG_ASSERT(!_M_has_sol); - if (__match_mode == _Match_mode::_Exact) - _M_has_sol = _M_current == _M_end; + if (this->_M_search_mode == _Search_mode::_Match) + _M_has_sol = this->_M_current == this->_M_end; else _M_has_sol = true; - if (_M_current == _M_begin - && (_M_flags & regex_constants::match_not_null)) + if (this->_M_current == this->_M_begin + && (this->_M_match_flags & regex_constants::match_not_null)) _M_has_sol = false; if (_M_has_sol) { - if (_M_nfa._M_flags & regex_constants::ECMAScript) + if (this->_M_nfa._M_options() & regex_constants::ECMAScript) _M_results = _M_cur_results; else // POSIX { _GLIBCXX_DEBUG_ASSERT(_M_states._M_get_sol_pos()); - // Here's POSIX's logic: match the longest one. However - // we never know which one (lhs or rhs of "|") is longer - // unless we try both of them and compare the results. - // The member variable _M_sol_pos records the end - // position of the last successful match. It's better - // to be larger, because POSIX regex is always greedy. - // TODO: This could be slow. + // TODO: This isn't entirely correct. Implement leftmost + // longest for POSIX. if (*_M_states._M_get_sol_pos() == _BiIter() - || std::distance(_M_begin, + || std::distance(this->_M_begin, *_M_states._M_get_sol_pos()) - < std::distance(_M_begin, _M_current)) + < std::distance(this->_M_begin, this->_M_current)) { - *_M_states._M_get_sol_pos() = _M_current; + *_M_states._M_get_sol_pos() = this->_M_current; _M_results = _M_cur_results; } } @@ -368,10 +402,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else { - if (_M_current == _M_begin - && (_M_flags & regex_constants::match_not_null)) + if (this->_M_current == this->_M_begin + && (this->_M_match_flags & regex_constants::match_not_null)) break; - if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end) + if (this->_M_search_mode == _Search_mode::_Search + || this->_M_current == this->_M_end) if (!_M_has_sol) { _M_has_sol = true; @@ -380,22 +415,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } break; case _S_opcode_alternative: - if (_M_nfa._M_flags & regex_constants::ECMAScript) + if (this->_M_nfa._M_options() & regex_constants::ECMAScript) { // TODO: Fix BFS support. It is wrong. - _M_dfs(__match_mode, __state._M_alt); + _M_dfs(__state._M_alt); // Pick lhs if it matches. Only try rhs if it doesn't. if (!_M_has_sol) - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); } else { // Try both and compare the result. // See "case _S_opcode_accept:" handling above. - _M_dfs(__match_mode, __state._M_alt); + _M_dfs(__state._M_alt); auto __has_sol = _M_has_sol; _M_has_sol = false; - _M_dfs(__match_mode, __state._M_next); + _M_dfs(__state._M_next); _M_has_sol |= __has_sol; } break; @@ -404,32 +439,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } - // Return whether now is at some word boundary. - template<typename _BiIter, typename _Alloc, typename _TraitsT, - bool __dfs_mode> - bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_word_boundary() const - { - bool __left_is_word = false; - if (_M_current != _M_begin - || (_M_flags & regex_constants::match_prev_avail)) - { - auto __prev = _M_current; - if (_M_is_word(*std::prev(__prev))) - __left_is_word = true; - } - bool __right_is_word = - _M_current != _M_end && _M_is_word(*_M_current); - - if (__left_is_word == __right_is_word) - return false; - if (__left_is_word && !(_M_flags & regex_constants::match_not_eow)) - return true; - if (__right_is_word && !(_M_flags & regex_constants::match_not_bow)) - return true; - return false; - } - _GLIBCXX_END_NAMESPACE_VERSION +} // namespace __regex } // namespace __detail } // namespace diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index b6fe4c7..5c8bdb6 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -59,8 +59,8 @@ #include <bits/regex_automaton.h> #include <bits/regex_scanner.h> #include <bits/regex_compiler.h> -#include <bits/regex.h> #include <bits/regex_executor.h> +#include <bits/regex.h> #endif // C++11