On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]> wrote:
> Changes in v5:
> - Replace char _ExecutorFrame::_M_char with
> uint8_t _ExecutorFrame::_M_bytes[7] to fully account for
> the implicit struct padding and make it char size/signedness
> agnostic.
>
> Changes in v4:
> - Do not remove the BFS executor. I convinced myself that it's
> worthwhile to keep it around for backwards compatibility and its
> better time complexity (e.g. for PR78276).
> - However, this means the mangling of _Executor is now the same as
> before (since the __dfs_mode template parameter is no longer gone).
> I believe we need to give this class a different mangling so
> programs that mix old/new std::regex impls continue to work.
>
> Changes in v3:
>
> - Add char _M_char member to _ExecutorFrame (without increasing the
> size of struct thanks to padding)
> - Use it to fix bug in restore_subexpr_end frame code whereby we
> weren't restoring _M_cur_results.matched properly
> - Combine restore_subexpr_begin/end frame codes into one
> restore_cur_results frame code
> - Combine restore_rep_count_val/posn frame codes into single
> restore_rep_count frame_code (this makes the patch 3/4 from v2
> obsolete)
> - Store the frame stack in the data member _ExecutorFrame::_M_frames
> alongside the rest of the executor state instead of using a local.
> Using a local variable is just more code with no real benefit.
>
> -- >8 --
>
> libstdc++/regex: Convert DFS executor to use iteration [PR86164]
>
> This replaces the recursion and implicit system stack usage of the DFS
> executor with iteration and an explicit heap-based stack. After this
> patch, we no longer stack overflow on the PR86164 testcases since system
> stack usage is no longer linear with respect to input size. This should
> otherwise be a non-functional change.
>
> PR libstdc++/86164
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/regex_executor.h (__detail::_ExecutorFrame):
> Declare.
> (__detail::_Executor::_M_node): Declare.
> (__detail::_Executor::_M_frames): New data member.
> * include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode):
> New.
> (__detail::_ExecutorFrame): New.
> (__detail::_Executor::_M_rep_once_more): Replace recursive
> _M_dfs calls with an _S_opcode_next frame push, and any tail work
> after such calls with an appropriate frame push.
> (__detail::_M_handle_repeat): Likewise.
> (__detail::_M_handle_subexpr_begin): Likewise.
> (__detail::_M_handle_subexpr_end): Likewise.
> (__detail::_M_handle_line_begin_assertion): Likewise.
> (__detail::_M_handle_line_end_assertion): Likewise.
> (__detail::_M_handle_word_boundary): Likewise.
> (__detail::_M_handle_subexpr_lookahead): Likewise.
> (__detail::_M_handle_match): Likewise.
> (__detail::_M_handle_backref): Likewise.
> (__detail::_M_handle_accept): Likewise.
> (__detail::_M_handle_alternative): Likewise.
> (__detail::_M_node): Factored out from _M_dfs.
> (__detail::_M_dfs): Push an initial frame to _M_frames that
> visits the starting node and pass this stack each subroutine.
> Pop the latest _ExecutorFrame from _M_frames and handle
> appropriately according to its _ExecutorFrameOpcode. Loop until
> _M_frames is empty.
> * include/std/regex: Include <cstdint>.
> ---
> libstdc++-v3/include/bits/regex_executor.h | 7 +
> libstdc++-v3/include/bits/regex_executor.tcc | 217 +++++++++++++++----
> libstdc++-v3/include/std/regex | 1 +
> 3 files changed, 180 insertions(+), 45 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/regex_executor.h
> b/libstdc++-v3/include/bits/regex_executor.h
> index f9857bfc4c1d..e9af007840bb 100644
> --- a/libstdc++-v3/include/bits/regex_executor.h
> +++ b/libstdc++-v3/include/bits/regex_executor.h
> @@ -41,6 +41,9 @@ namespace __detail
> * @{
> */
>
> + template<typename _BiIter, bool _Trivial =
> is_trivially_copyable<_BiIter>::value>
> + struct _ExecutorFrame;
> +
> /**
> * @brief Takes a regex and an input string and does the matching.
> *
> @@ -142,6 +145,9 @@ namespace __detail
> void
> _M_handle_alternative(_Match_mode, _StateIdT);
>
> + void
> + _M_node(_Match_mode, _StateIdT);
> +
> void
> _M_dfs(_Match_mode __match_mode, _StateIdT __start);
>
> @@ -290,6 +296,7 @@ namespace __detail
> };
>
> public:
> + _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
> _ResultsVec
> _M_cur_results;
> _BiIter _M_current;
> _BiIter _M_begin;
> diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
> b/libstdc++-v3/include/bits/regex_executor.tcc
> index 421e585f39d9..bc7731953b0e 100644
> --- a/libstdc++-v3/include/bits/regex_executor.tcc
> +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> @@ -55,6 +55,71 @@ namespace __detail
> return false;
> }
>
> + enum _ExecutorFrameOpcode : uint8_t
> + {
> + _S_fopcode_next,
> + _S_fopcode_fallback_next,
> + _S_fopcode_rep_once_more,
> + _S_fopcode_fallback_rep_once_more,
> + _S_fopcode_posix_alternative,
> + _S_fopcode_merge_sol,
> + _S_fopcode_restore_current,
> + _S_fopcode_restore_cur_results,
> + _S_fopcode_restore_rep_count,
> + _S_fopcode_decrement_rep_count,
> + };
> +
> + template<typename _BiIter, bool _Trivial /* =
> is_trivially_copyable<_BiIter>::value */>
> + struct _ExecutorFrame
> + {
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
> + : _M_op(__op), _M_state_id(__i)
> + { }
> +
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter
> __p)
> + : _M_op(__op), _M_state_id(__i), _M_pos(__p)
> + { }
> +
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v)
> + : _M_op(__op), _M_state_id(__i), _M_val(__v)
> + { }
> +
> + _ExecutorFrameOpcode _M_op;
> + uint8_t _M_bytes[7];
> + _StateIdT _M_state_id;
> + // _M_pos and _M_val are mutually exclusive, which the optimized
> + // partial specialization below depends on.
> + _BiIter _M_pos = _BiIter();
> + long _M_val = 0;
> + };
> +
> + // Space-optimized partial specialization for when the input iterator is
> + // trivially copyable.
> + template<typename _BiIter>
> + struct _ExecutorFrame<_BiIter, true>
> + {
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
> + : _M_op(__op), _M_state_id(__i)
> + { }
> +
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, _BiIter
> __p)
> + : _M_op(__op), _M_state_id(__i), _M_pos(__p)
> + { }
> +
> + _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, long __v)
> + : _M_op(__op), _M_state_id(__i), _M_val(__v)
> + { }
> +
> + _ExecutorFrameOpcode _M_op;
> + uint8_t _M_bytes[7];
> + _StateIdT _M_state_id;
> + union
> + {
> + _BiIter _M_pos;
> + long _M_val;
> + };
> + };
> +
> // The _M_main function operates in different modes, DFS mode or BFS
> mode,
> // indicated by template parameter __dfs_mode, and dispatches to one of
> the
> // _M_main_dispatch overloads.
> @@ -181,19 +246,20 @@ namespace __detail
> auto& __rep_count = _M_rep_count[__i];
> if (__rep_count.second == 0 || __rep_count.first != _M_current)
> {
> - auto __back = __rep_count;
> + _M_frames.emplace_back(_S_fopcode_restore_rep_count,
> + __i, __rep_count.first);
> + _M_frames.back()._M_bytes[0] = __rep_count.second;
> __rep_count.first = _M_current;
> __rep_count.second = 1;
> - _M_dfs(__match_mode, __state._M_alt);
> - __rep_count = __back;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> else
> {
> if (__rep_count.second < 2)
> {
> __rep_count.second++;
> - _M_dfs(__match_mode, __state._M_alt);
> - __rep_count.second--;
> + _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> }
> }
> @@ -208,23 +274,23 @@ namespace __detail
> _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
> {
> const auto& __state = _M_nfa[__i];
> -
> // Greedy.
> if (!__state._M_neg)
> {
> - _M_rep_once_more(__match_mode, __i);
> - // If it's DFS executor and already accepted, we're done.
> - if (!__dfs_mode || !_M_has_sol)
> - _M_dfs(__match_mode, __state._M_next);
> + if constexpr (__dfs_mode)
> + // If it's DFS executor and already accepted, we're done.
> + _M_frames.emplace_back(_S_fopcode_fallback_next,
> __state._M_next);
> + else
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
> }
> else // Non-greedy mode
> {
> if constexpr (__dfs_mode)
> {
> // vice-versa.
> - _M_dfs(__match_mode, __state._M_next);
> - if (!_M_has_sol)
> - _M_rep_once_more(__match_mode, __i);
> + _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more,
> __i);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> else
> {
> @@ -233,12 +299,11 @@ namespace __detail
> // be better by attempting its next node.
> if (!_M_has_sol)
> {
> - _M_dfs(__match_mode, __state._M_next);
> // DON'T attempt anything if it's already accepted. An
> // accepted state *must* be better than a solution that
> // matches a non-greedy quantifier one more time.
> - if (!_M_has_sol)
> - _M_rep_once_more(__match_mode, __i);
> +
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> }
> }
> @@ -250,12 +315,12 @@ namespace __detail
> _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
> {
> const auto& __state = _M_nfa[__i];
> -
> auto& __res = _M_cur_results[__state._M_subexpr];
> - auto __back = __res.first;
> + _M_frames.emplace_back(_S_fopcode_restore_cur_results,
> + __state._M_subexpr, __res.first);
> + _M_frames.back()._M_bytes[0] = 0xff;
> __res.first = _M_current;
> - _M_dfs(__match_mode, __state._M_next);
> - __res.first = __back;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -264,13 +329,13 @@ namespace __detail
> _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
> {
> const auto& __state = _M_nfa[__i];
> -
> auto& __res = _M_cur_results[__state._M_subexpr];
> - auto __back = __res;
> + _M_frames.emplace_back(_S_fopcode_restore_cur_results,
> + __state._M_subexpr, __res.second);
> + _M_frames.back()._M_bytes[0] = __res.matched;
> __res.second = _M_current;
> __res.matched = true;
> - _M_dfs(__match_mode, __state._M_next);
> - __res = __back;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -280,7 +345,7 @@ namespace __detail
> {
> const auto& __state = _M_nfa[__i];
> if (_M_at_begin())
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -290,7 +355,7 @@ namespace __detail
> {
> const auto& __state = _M_nfa[__i];
> if (_M_at_end())
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -300,7 +365,7 @@ namespace __detail
> {
> const auto& __state = _M_nfa[__i];
> if (_M_word_boundary() == !__state._M_neg)
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> // Here __state._M_alt offers a single start node for a sub-NFA.
> @@ -312,7 +377,7 @@ namespace __detail
> {
> const auto& __state = _M_nfa[__i];
> if (_M_lookahead(__state._M_alt) == !__state._M_neg)
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> @@ -321,16 +386,15 @@ namespace __detail
> _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
> {
> const auto& __state = _M_nfa[__i];
> -
> if (_M_current == _M_end)
> return;
> if constexpr (__dfs_mode)
> {
> if (__state._M_matches(*_M_current))
> {
> + _M_frames.emplace_back(_S_fopcode_restore_current, 0,
> _M_current);
> ++_M_current;
> - _M_dfs(__match_mode, __state._M_next);
> - --_M_current;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> }
> else
> @@ -413,13 +477,12 @@ namespace __detail
> {
> if (__last != _M_current)
> {
> - auto __backup = _M_current;
> + _M_frames.emplace_back(_S_fopcode_restore_current, 0,
> _M_current);
> _M_current = __last;
> - _M_dfs(__match_mode, __state._M_next);
> - _M_current = __backup;
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> else
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
> }
> }
>
> @@ -483,31 +546,26 @@ namespace __detail
> _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
> {
> const auto& __state = _M_nfa[__i];
> -
> if (_M_nfa._M_flags & regex_constants::ECMAScript)
> {
> // TODO: Fix BFS support. It is wrong.
> - _M_dfs(__match_mode, __state._M_alt);
> // Pick lhs if it matches. Only try rhs if it doesn't.
> - if (!_M_has_sol)
> - _M_dfs(__match_mode, __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_fallback_next,
> __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> else
> {
> // Try both and compare the result.
> // See "case _S_opcode_accept:" handling above.
> - _M_dfs(__match_mode, __state._M_alt);
> - auto __has_sol = _M_has_sol;
> - _M_has_sol = false;
> - _M_dfs(__match_mode, __state._M_next);
> - _M_has_sol |= __has_sol;
> + _M_frames.emplace_back(_S_fopcode_posix_alternative,
> __state._M_next);
> + _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
> }
> }
>
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> bool __dfs_mode>
> void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> - _M_dfs(_Match_mode __match_mode, _StateIdT __i)
> + _M_node(_Match_mode __match_mode, _StateIdT __i)
> {
> if (_M_states._M_visited(__i))
> return;
> @@ -545,6 +603,75 @@ namespace __detail
> }
> }
>
> + template<typename _BiIter, typename _Alloc, typename _TraitsT,
> + bool __dfs_mode>
> + void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
> + _M_dfs(_Match_mode __match_mode, _StateIdT __start)
> + {
> + _M_frames.emplace_back(_S_fopcode_next, __start);
> +
> + while (!_M_frames.empty())
> + {
> + _ExecutorFrame<_BiIter> __frame = std::move(_M_frames.back());
> + _M_frames.pop_back();
> +
> + switch (__frame._M_op)
> + {
> + case _S_fopcode_next:
> + _M_node(__match_mode, __frame._M_state_id);
> + break;
> +
> + case _S_fopcode_fallback_next:
> + if (!_M_has_sol)
> + _M_frames.emplace_back(_S_fopcode_next,
> __frame._M_state_id);
>
We are inserting frame here, potentially expanding the vector, while I think
this could be expressed equivalently as:
case _S_fopcode_fallback_next:
if (!_M_has_sol)
_M_node(__match_mode, __frame._M_state_id);
break;
Or to make sure that fallback_next is same as next.
case _S_fopcode_fallback_next:
if (_M_has_sol)
break;
// fallthrough
case _S_fopcode_next:
_M_node(__match_mode, __frame._M_state_id);
break;
WDYT?
> + break;
> +
> + case _S_fopcode_fallback_rep_once_more:
> + if (!_M_has_sol)
> + _M_frames.emplace_back(_S_fopcode_rep_once_more,
> + __frame._M_state_id);
>
> + break;
> +
> + case _S_fopcode_rep_once_more:
> + _M_rep_once_more(__match_mode, __frame._M_state_id);
> + break;
> +
> + case _S_fopcode_posix_alternative:
> + _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
> + _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
> + _M_has_sol = false;
> + break;
> +
> + case _S_fopcode_merge_sol:
> + _M_has_sol |= __frame._M_val;
> + break;
> +
> + case _S_fopcode_restore_current:
> + _M_current = __frame._M_pos;
> + break;
> +
> + case _S_fopcode_restore_cur_results:
> + if (__frame._M_bytes[0] == 0xff)
> + _M_cur_results[__frame._M_state_id].first = __frame._M_pos;
> + else
> + {
> + _M_cur_results[__frame._M_state_id].second =
> __frame._M_pos;
> + _M_cur_results[__frame._M_state_id].matched =
> __frame._M_bytes[0];
> + }
> + break;
> +
> + case _S_fopcode_restore_rep_count:
> + _M_rep_count[__frame._M_state_id].first = __frame._M_pos;
> + _M_rep_count[__frame._M_state_id].second =
> __frame._M_bytes[0];
> + break;
> +
> + case _S_fopcode_decrement_rep_count:
> + _M_rep_count[__frame._M_state_id].second--;
> + break;
> + }
> + }
> + }
> +
> // Return whether now is at some word boundary.
> template<typename _BiIter, typename _Alloc, typename _TraitsT,
> bool __dfs_mode>
> diff --git a/libstdc++-v3/include/std/regex
> b/libstdc++-v3/include/std/regex
> index c8fecfb2ad7a..003499c1bab5 100644
> --- a/libstdc++-v3/include/std/regex
> +++ b/libstdc++-v3/include/std/regex
> @@ -40,6 +40,7 @@
> #else
>
> #include <bitset>
> +#include <cstdint>
> #include <locale>
> #include <stack>
> #include <stdexcept>
> --
> 2.53.0.rc1.65.gea24e2c554
>
>