On Wed, 28 Jan 2026, Tomasz Kaminski wrote:
>
>
> On Wed, Jan 28, 2026 at 4:54 PM Patrick Palka <[email protected]> wrote:
> On Wed, 28 Jan 2026, Tomasz Kaminski wrote:
>
> >
> >
> > On Wed, Jan 28, 2026 at 11:47 AM Tomasz Kaminski
> <[email protected]> wrote:
> >
> >
> > On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]>
> wrote:
> > Changes in v5:
> > - Replace char _ExecutorFrame::_M_char with
> > uint8_t _ExecutorFrame::_M_bytes[7] to fully account for
> > the implicit struct padding and make it char size/signedness
> > agnostic.
> >
> > Changes in v4:
> > - Do not remove the BFS executor. I convinced myself that
> it's
> > worthwhile to keep it around for backwards compatibility
> and its
> > better time complexity (e.g. for PR78276).
> > - However, this means the mangling of _Executor is now the
> same as
> > before (since the __dfs_mode template parameter is no
> longer gone).
> > I believe we need to give this class a different mangling so
> > programs that mix old/new std::regex impls continue to work.
> >
> > Changes in v3:
> >
> > - Add char _M_char member to _ExecutorFrame (without
> increasing the
> > size of struct thanks to padding)
> > - Use it to fix bug in restore_subexpr_end frame code whereby
> we
> > weren't restoring _M_cur_results.matched properly
> > - Combine restore_subexpr_begin/end frame codes into one
> > restore_cur_results frame code
> > - Combine restore_rep_count_val/posn frame codes into single
> > restore_rep_count frame_code (this makes the patch 3/4 from
> v2
> > obsolete)
> > - Store the frame stack in the data member
> _ExecutorFrame::_M_frames
> > alongside the rest of the executor state instead of using a
> local.
> > Using a local variable is just more code with no real
> benefit.
> >
> > -- >8 --
> >
> > libstdc++/regex: Convert DFS executor to use iteration [PR86164]
> >
> > This replaces the recursion and implicit system stack usage of
> the DFS
> > executor with iteration and an explicit heap-based stack.
> After this
> > patch, we no longer stack overflow on the PR86164 testcases
> since system
> > stack usage is no longer linear with respect to input size.
> This should
> > otherwise be a non-functional change.
> >
> > Except the one suggestion below, this LGTM.
> >
> > PR libstdc++/86164
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/regex_executor.h
> (__detail::_ExecutorFrame):
> > Declare.
> > (__detail::_Executor::_M_node): Declare.
> > (__detail::_Executor::_M_frames): New data member.
> > * include/bits/regex_executor.tcc
> (__detail::_ExecutorFrameOpcode):
> > New.
> > (__detail::_ExecutorFrame): New.
> > (__detail::_Executor::_M_rep_once_more): Replace
> recursive
> > _M_dfs calls with an _S_opcode_next frame push,
> and any tail work
> > after such calls with an appropriate frame push.
> > (__detail::_M_handle_repeat): Likewise.
> > (__detail::_M_handle_subexpr_begin): Likewise.
> > (__detail::_M_handle_subexpr_end): Likewise.
> > (__detail::_M_handle_line_begin_assertion):
> Likewise.
> > (__detail::_M_handle_line_end_assertion):
> Likewise.
> > (__detail::_M_handle_word_boundary): Likewise.
> > (__detail::_M_handle_subexpr_lookahead): Likewise.
> > (__detail::_M_handle_match): Likewise.
> > (__detail::_M_handle_backref): Likewise.
> > (__detail::_M_handle_accept): Likewise.
> > (__detail::_M_handle_alternative): Likewise.
> > (__detail::_M_node): Factored out from _M_dfs.
> > (__detail::_M_dfs): Push an initial frame to
> _M_frames that
> > visits the starting node and pass this stack each
> subroutine.
> > Pop the latest _ExecutorFrame from _M_frames and
> handle
> > appropriately according to its
> _ExecutorFrameOpcode. Loop until
> > _M_frames is empty.
> > * include/std/regex: Include <cstdint>.
> > ---
> > libstdc++-v3/include/bits/regex_executor.h | 7 +
> > libstdc++-v3/include/bits/regex_executor.tcc | 217
> +++++++++++++++----
> > libstdc++-v3/include/std/regex | 1 +
> > 3 files changed, 180 insertions(+), 45 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/regex_executor.h
> b/libstdc++-v3/include/bits/regex_executor.h
> > index f9857bfc4c1d..e9af007840bb 100644
> > --- a/libstdc++-v3/include/bits/regex_executor.h
> > +++ b/libstdc++-v3/include/bits/regex_executor.h
> > @@ -41,6 +41,9 @@ namespace __detail
> > * @{
> > */
> >
> > + template<typename _BiIter, bool _Trivial =
> is_trivially_copyable<_BiIter>::value>
> > + struct _ExecutorFrame;
> > +
> > /**
> > * @brief Takes a regex and an input string and does
> the matching.
> > *
> > @@ -142,6 +145,9 @@ namespace __detail
> > void
> > _M_handle_alternative(_Match_mode, _StateIdT);
> >
> > + void
> > + _M_node(_Match_mode, _StateIdT);
> > +
> > void
> > _M_dfs(_Match_mode __match_mode, _StateIdT
> __start);
> >
> > @@ -290,6 +296,7 @@ namespace __detail
> > };
> >
> > public:
> > + _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>
> _M_frames;
> > _ResultsVec
> _M_cur_results;
> > _BiIter
> _M_current;
> > _BiIter
> _M_begin;
> > diff --git a/libstdc++-v3/include/bits/regex_executor.tcc
> b/libstdc++-v3/include/bits/regex_executor.tcc
> > index 421e585f39d9..bc7731953b0e 100644
> > --- a/libstdc++-v3/include/bits/regex_executor.tcc
> > +++ b/libstdc++-v3/include/bits/regex_executor.tcc
> > @@ -55,6 +55,71 @@ namespace __detail
> > return false;
> > }
> >
> > + enum _ExecutorFrameOpcode : uint8_t
> > + {
> > + _S_fopcode_next,
> > + _S_fopcode_fallback_next,
> > + _S_fopcode_rep_once_more,
> > + _S_fopcode_fallback_rep_once_more,
> > + _S_fopcode_posix_alternative,
> > + _S_fopcode_merge_sol,
> > + _S_fopcode_restore_current,
> > + _S_fopcode_restore_cur_results,
> > + _S_fopcode_restore_rep_count,
> > + _S_fopcode_decrement_rep_count,
> > + };
> > +
> > + template<typename _BiIter, bool _Trivial /* =
> is_trivially_copyable<_BiIter>::value */>
> > + struct _ExecutorFrame
> > + {
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i)
> > + : _M_op(__op), _M_state_id(__i)
> > + { }
> > +
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i, _BiIter __p)
> > + : _M_op(__op), _M_state_id(__i), _M_pos(__p)
> > + { }
> > +
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i, long __v)
> > + : _M_op(__op), _M_state_id(__i), _M_val(__v)
> > + { }
> > +
> > + _ExecutorFrameOpcode _M_op;
> > + uint8_t _M_bytes[7];
> > + _StateIdT _M_state_id;
> > + // _M_pos and _M_val are mutually exclusive, which
> the optimized
> > + // partial specialization below depends on.
> > + _BiIter _M_pos = _BiIter();
> > + long _M_val = 0;
> > + };
> > +
> > + // Space-optimized partial specialization for when the
> input iterator is
> > + // trivially copyable.
> > + template<typename _BiIter>
> > + struct _ExecutorFrame<_BiIter, true>
> > + {
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i)
> > + : _M_op(__op), _M_state_id(__i)
> > + { }
> > +
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i, _BiIter __p)
> > + : _M_op(__op), _M_state_id(__i), _M_pos(__p)
> > + { }
> > +
> > + _ExecutorFrame(_ExecutorFrameOpcode __op,
> _StateIdT __i, long __v)
> > + : _M_op(__op), _M_state_id(__i), _M_val(__v)
> > + { }
> > +
> > + _ExecutorFrameOpcode _M_op;
> > + uint8_t _M_bytes[7];
> > + _StateIdT _M_state_id;
> > + union
> > + {
> > + _BiIter _M_pos;
> > + long _M_val;
> > + };
> > + };
> > +
> > // The _M_main function operates in different modes,
> DFS mode or BFS mode,
> > // indicated by template parameter __dfs_mode, and
> dispatches to one of the
> > // _M_main_dispatch overloads.
> > @@ -181,19 +246,20 @@ namespace __detail
> > auto& __rep_count = _M_rep_count[__i];
> > if (__rep_count.second == 0 || __rep_count.first
> != _M_current)
> > {
> > - auto __back = __rep_count;
> > +
> _M_frames.emplace_back(_S_fopcode_restore_rep_count,
> > + __i, __rep_count.first);
> > + _M_frames.back()._M_bytes[0] =
> __rep_count.second;
> > __rep_count.first = _M_current;
> > __rep_count.second = 1;
> > - _M_dfs(__match_mode, __state._M_alt);
> > - __rep_count = __back;
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_alt);
> > }
> > else
> > {
> > if (__rep_count.second < 2)
> > {
> > __rep_count.second++;
> > - _M_dfs(__match_mode, __state._M_alt);
> > - __rep_count.second--;
> > +
> _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_alt);
> > }
> > }
> > }
> > @@ -208,23 +274,23 @@ namespace __detail
> > _M_handle_repeat(_Match_mode __match_mode, _StateIdT
> __i)
> > {
> > const auto& __state = _M_nfa[__i];
> > -
> > // Greedy.
> > if (!__state._M_neg)
> > {
> > - _M_rep_once_more(__match_mode, __i);
> > - // If it's DFS executor and already accepted,
> we're done.
> > - if (!__dfs_mode || !_M_has_sol)
> > - _M_dfs(__match_mode, __state._M_next);
> > + if constexpr (__dfs_mode)
> > + // If it's DFS executor and already accepted,
> we're done.
> > +
> _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
> > + else
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > +
> _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
> > }
> > else // Non-greedy mode
> > {
> > if constexpr (__dfs_mode)
> > {
> > // vice-versa.
> > - _M_dfs(__match_mode, __state._M_next);
> > - if (!_M_has_sol)
> > - _M_rep_once_more(__match_mode, __i);
> > +
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> > else
> > {
> > @@ -233,12 +299,11 @@ namespace __detail
> > // be better by attempting its next node.
> > if (!_M_has_sol)
> > {
> > - _M_dfs(__match_mode, __state._M_next);
> > // DON'T attempt anything if it's
> already accepted. An
> > // accepted state *must* be better than
> a solution that
> > // matches a non-greedy quantifier one
> more time.
> > - if (!_M_has_sol)
> > - _M_rep_once_more(__match_mode, __i);
> > +
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> > }
> > }
> > @@ -250,12 +315,12 @@ namespace __detail
> > _M_handle_subexpr_begin(_Match_mode __match_mode,
> _StateIdT __i)
> > {
> > const auto& __state = _M_nfa[__i];
> > -
> > auto& __res = _M_cur_results[__state._M_subexpr];
> > - auto __back = __res.first;
> > +
> _M_frames.emplace_back(_S_fopcode_restore_cur_results,
> > + __state._M_subexpr,
> __res.first);
> > + _M_frames.back()._M_bytes[0] = 0xff;
> > __res.first = _M_current;
> > - _M_dfs(__match_mode, __state._M_next);
> > - __res.first = __back;
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > @@ -264,13 +329,13 @@ namespace __detail
> > _M_handle_subexpr_end(_Match_mode __match_mode,
> _StateIdT __i)
> > {
> > const auto& __state = _M_nfa[__i];
> > -
> > auto& __res = _M_cur_results[__state._M_subexpr];
> > - auto __back = __res;
> > +
> _M_frames.emplace_back(_S_fopcode_restore_cur_results,
> > + __state._M_subexpr,
> __res.second);
> > + _M_frames.back()._M_bytes[0] = __res.matched;
> > __res.second = _M_current;
> > __res.matched = true;
> > - _M_dfs(__match_mode, __state._M_next);
> > - __res = __back;
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > @@ -280,7 +345,7 @@ namespace __detail
> > {
> > const auto& __state = _M_nfa[__i];
> > if (_M_at_begin())
> > - _M_dfs(__match_mode, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > @@ -290,7 +355,7 @@ namespace __detail
> > {
> > const auto& __state = _M_nfa[__i];
> > if (_M_at_end())
> > - _M_dfs(__match_mode, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > @@ -300,7 +365,7 @@ namespace __detail
> > {
> > const auto& __state = _M_nfa[__i];
> > if (_M_word_boundary() == !__state._M_neg)
> > - _M_dfs(__match_mode, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > // Here __state._M_alt offers a single start node for
> a sub-NFA.
> > @@ -312,7 +377,7 @@ namespace __detail
> > {
> > const auto& __state = _M_nfa[__i];
> > if (_M_lookahead(__state._M_alt) ==
> !__state._M_neg)
> > - _M_dfs(__match_mode, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > @@ -321,16 +386,15 @@ namespace __detail
> > _M_handle_match(_Match_mode __match_mode, _StateIdT
> __i)
> > {
> > const auto& __state = _M_nfa[__i];
> > -
> > if (_M_current == _M_end)
> > return;
> > if constexpr (__dfs_mode)
> > {
> > if (__state._M_matches(*_M_current))
> > {
> > +
> _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
> > ++_M_current;
> > - _M_dfs(__match_mode, __state._M_next);
> > - --_M_current;
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> > }
> > else
> > @@ -413,13 +477,12 @@ namespace __detail
> > {
> > if (__last != _M_current)
> > {
> > - auto __backup = _M_current;
> > +
> _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
> > _M_current = __last;
> > - _M_dfs(__match_mode, __state._M_next);
> > - _M_current = __backup;
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> > else
> > - _M_dfs(__match_mode, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_next);
> > }
> > }
> >
> > @@ -483,31 +546,26 @@ namespace __detail
> > _M_handle_alternative(_Match_mode __match_mode,
> _StateIdT __i)
> > {
> > const auto& __state = _M_nfa[__i];
> > -
> > if (_M_nfa._M_flags & regex_constants::ECMAScript)
> > {
> > // TODO: Fix BFS support. It is wrong.
> > - _M_dfs(__match_mode, __state._M_alt);
> > // Pick lhs if it matches. Only try rhs if it
> doesn't.
> > - if (!_M_has_sol)
> > - _M_dfs(__match_mode, __state._M_next);
> > +
> _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_alt);
> > }
> > else
> > {
> > // Try both and compare the result.
> > // See "case _S_opcode_accept:" handling above.
> > - _M_dfs(__match_mode, __state._M_alt);
> > - auto __has_sol = _M_has_sol;
> > - _M_has_sol = false;
> > - _M_dfs(__match_mode, __state._M_next);
> > - _M_has_sol |= __has_sol;
> > +
> _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __state._M_alt);
> > }
> > }
> >
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > bool __dfs_mode>
> > void _Executor<_BiIter, _Alloc, _TraitsT,
> __dfs_mode>::
> > - _M_dfs(_Match_mode __match_mode, _StateIdT __i)
> > + _M_node(_Match_mode __match_mode, _StateIdT __i)
> > {
> > if (_M_states._M_visited(__i))
> > return;
> > @@ -545,6 +603,75 @@ namespace __detail
> > }
> > }
> >
> > + template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > + bool __dfs_mode>
> > + void _Executor<_BiIter, _Alloc, _TraitsT,
> __dfs_mode>::
> > + _M_dfs(_Match_mode __match_mode, _StateIdT __start)
> > + {
> > + _M_frames.emplace_back(_S_fopcode_next, __start);
> > +
> > + while (!_M_frames.empty())
> > + {
> > + _ExecutorFrame<_BiIter> __frame =
> std::move(_M_frames.back());
> > + _M_frames.pop_back();
> > +
> > + switch (__frame._M_op)
> > + {
> > + case _S_fopcode_next:
> > + _M_node(__match_mode, __frame._M_state_id);
> > + break;
> > +
> > + case _S_fopcode_fallback_next:
> > + if (!_M_has_sol)
> > + _M_frames.emplace_back(_S_fopcode_next,
> __frame._M_state_id);
> >
> > We are inserting frame here, potentially expanding the vector,
> while I think
> > this could be expressed equivalently as:
> > case _S_fopcode_fallback_next:
> > if (!_M_has_sol)
> > _M_node(__match_mode, __frame._M_state_id);
> > break;
>
> Yes I think in theory we could do this shortcut wherever there's an
> emplace_back call in the tail position (even in the subroutines e.g.
> _M_rep_once_more) since we know we're going to immediately pop that
> frame in the next iteration of the loop. It does hurt readability a
> little bit and makes the implementation less obviously non-recursive.
> Maybe it's worthwhile doing it selectively where it makes an observable
> performance difference (particularly with -O0).
>
> Another option is to wrap the _M_frames stack in a class that contains
> an additional member holding the top of the stack:
>
> struct {
> std::vector<_ExecutorFrame> _M_frames;
> _ExecutorFrame _M_top;
> // emplace_back, pop_back, back, empty
> };
>
> That way consecutive push+pops won't touch the vector at all, I
> think? I'll try these ideas out.
>
> As explained below, I will find the implementation of fallback opcodes
> easier, if they would literally fallback/through in implementation. Using
> a separate _M_top haven't crossed my mind at that time.
>
> > Or to make sure that fallback_next is same as next.
> > case _S_fopcode_fallback_next:
> > if (_M_has_sol)
> > break;
> > // fallthrough
> > case _S_fopcode_next:
> > _M_node(__match_mode, __frame._M_state_id);
> > break;
>
> IIUC this isn't true after the 2nd patch which makes
> _S_fopcode_fallback_next also restore _M_current.
>
> We can add a restore of _M_current before fallthrough. I personally will find
> it cleaner, iff _S_fopcode_fallback_X will just do some steps, and fall
> through for _S_fopcode_X.
> But this is very subjective, so I am fine either way.
Oops, you're right and I definitely prefer using a fallthrough too.
Will update the patch.
>
> > WDYT?
> > + break;
> > +
> > + case _S_fopcode_fallback_rep_once_more:
> > + if (!_M_has_sol)
> > + _M_frames.emplace_back(_S_fopcode_rep_once_more,
> > + __frame._M_state_id);
> >
> >
> > + break;
> > +
> > + case _S_fopcode_rep_once_more:
> > + _M_rep_once_more(__match_mode,
> __frame._M_state_id);
> > + break;
> > +
> > + case _S_fopcode_posix_alternative:
> > + _M_frames.emplace_back(_S_fopcode_merge_sol, 0,
> _M_has_sol);
> > + _M_frames.emplace_back(_S_fopcode_next,
> __frame._M_state_id);
> > + _M_has_sol = false;
> > + break;
> > +
> > + case _S_fopcode_merge_sol:
> > + _M_has_sol |= __frame._M_val;
> > + break;
> > +
> > + case _S_fopcode_restore_current:
> > + _M_current = __frame._M_pos;
> > + break;
> > +
> > + case _S_fopcode_restore_cur_results:
> > + if (__frame._M_bytes[0] == 0xff)
> > + _M_cur_results[__frame._M_state_id].first =
> __frame._M_pos;
> > + else
> > + {
> > + _M_cur_results[__frame._M_state_id].second =
> __frame._M_pos;
> > + _M_cur_results[__frame._M_state_id].matched =
> __frame._M_bytes[0];
> > + }
> > + break;
> > +
> > + case _S_fopcode_restore_rep_count:
> > + _M_rep_count[__frame._M_state_id].first =
> __frame._M_pos;
> > + _M_rep_count[__frame._M_state_id].second =
> __frame._M_bytes[0];
> > + break;
> > +
> > + case _S_fopcode_decrement_rep_count:
> > + _M_rep_count[__frame._M_state_id].second--;
> > + break;
> > + }
> > + }
> > + }
> > +
> > // Return whether now is at some word boundary.
> > template<typename _BiIter, typename _Alloc, typename
> _TraitsT,
> > bool __dfs_mode>
> > diff --git a/libstdc++-v3/include/std/regex
> b/libstdc++-v3/include/std/regex
> > index c8fecfb2ad7a..003499c1bab5 100644
> > --- a/libstdc++-v3/include/std/regex
> > +++ b/libstdc++-v3/include/std/regex
> > @@ -40,6 +40,7 @@
> > #else
> >
> > #include <bitset>
> > +#include <cstdint>
> > #include <locale>
> > #include <stack>
> > #include <stdexcept>
> > --
> > 2.53.0.rc1.65.gea24e2c554
> >
> >
> >
>
>
>