On Wed, 28 Jan 2026, Tomasz Kaminski wrote:

> 
> 
> On Wed, Jan 28, 2026 at 11:47 AM Tomasz Kaminski <[email protected]> wrote:
> 
> 
> On Sat, Jan 24, 2026 at 6:21 PM Patrick Palka <[email protected]> wrote:
>       Changes in v5:
>         - Replace char _ExecutorFrame::_M_char with
>           uint8_t _ExecutorFrame::_M_bytes[7] to fully account for
>           the implicit struct padding and make it char size/signedness
>           agnostic.
> 
>       Changes in v4:
>         - Do not remove the BFS executor.  I convinced myself that it's
>           worthwhile to keep it around for backwards compatibility and its
>           better time complexity (e.g. for PR78276).
>         - However, this means the mangling of _Executor is now the same as
>           before (since the __dfs_mode template parameter is no longer gone).
>           I believe we need to give this class a different mangling so
>           programs that mix old/new std::regex impls continue to work.
> 
>       Changes in v3:
> 
>         - Add char _M_char member to _ExecutorFrame (without increasing the
>           size of struct thanks to padding)
>         - Use it to fix bug in restore_subexpr_end frame code whereby we
>           weren't restoring _M_cur_results.matched properly
>         - Combine restore_subexpr_begin/end frame codes into one
>           restore_cur_results frame code
>         - Combine restore_rep_count_val/posn frame codes into single
>           restore_rep_count frame_code (this makes the patch 3/4 from v2
>           obsolete)
>         - Store the frame stack in the data member _ExecutorFrame::_M_frames
>           alongside the rest of the executor state instead of using a local.
>           Using a local variable is just more code with no real benefit.
> 
>       -- >8 --
> 
>       libstdc++/regex: Convert DFS executor to use iteration [PR86164]
> 
>       This replaces the recursion and implicit system stack usage of the DFS
>       executor with iteration and an explicit heap-based stack.  After this
>       patch, we no longer stack overflow on the PR86164 testcases since system
>       stack usage is no longer linear with respect to input size.  This should
>       otherwise be a non-functional change.
> 
> Except the one suggestion below, this LGTM. 
> 
>                     PR libstdc++/86164
> 
>             libstdc++-v3/ChangeLog:
> 
>                     * include/bits/regex_executor.h 
> (__detail::_ExecutorFrame):
>                     Declare.
>                     (__detail::_Executor::_M_node): Declare.
>                     (__detail::_Executor::_M_frames): New data member.
>                     * include/bits/regex_executor.tcc 
> (__detail::_ExecutorFrameOpcode):
>                     New.
>                     (__detail::_ExecutorFrame): New.
>                     (__detail::_Executor::_M_rep_once_more): Replace recursive
>                     _M_dfs calls with an _S_opcode_next frame push, and any 
> tail work
>                     after such calls with an appropriate frame push.
>                     (__detail::_M_handle_repeat): Likewise.
>                     (__detail::_M_handle_subexpr_begin): Likewise.
>                     (__detail::_M_handle_subexpr_end): Likewise.
>                     (__detail::_M_handle_line_begin_assertion): Likewise.
>                     (__detail::_M_handle_line_end_assertion): Likewise.
>                     (__detail::_M_handle_word_boundary): Likewise.
>                     (__detail::_M_handle_subexpr_lookahead): Likewise.
>                     (__detail::_M_handle_match): Likewise.
>                     (__detail::_M_handle_backref): Likewise.
>                     (__detail::_M_handle_accept): Likewise.
>                     (__detail::_M_handle_alternative): Likewise.
>                     (__detail::_M_node): Factored out from _M_dfs.
>                     (__detail::_M_dfs): Push an initial frame to _M_frames 
> that
>                     visits the starting node and pass this stack each 
> subroutine.
>                     Pop the latest _ExecutorFrame from _M_frames and handle
>                     appropriately according to its _ExecutorFrameOpcode.  
> Loop until
>                     _M_frames is empty.
>                     * include/std/regex: Include <cstdint>.
>             ---
>              libstdc++-v3/include/bits/regex_executor.h   |   7 +
>              libstdc++-v3/include/bits/regex_executor.tcc | 217 
> +++++++++++++++----
>              libstdc++-v3/include/std/regex               |   1 +
>              3 files changed, 180 insertions(+), 45 deletions(-)
> 
>             diff --git a/libstdc++-v3/include/bits/regex_executor.h 
> b/libstdc++-v3/include/bits/regex_executor.h
>             index f9857bfc4c1d..e9af007840bb 100644
>             --- a/libstdc++-v3/include/bits/regex_executor.h
>             +++ b/libstdc++-v3/include/bits/regex_executor.h
>             @@ -41,6 +41,9 @@ namespace __detail
>                 * @{
>                 */
> 
>             +  template<typename _BiIter, bool _Trivial = 
> is_trivially_copyable<_BiIter>::value>
>             +    struct _ExecutorFrame;
>             +
>                /**
>                 * @brief Takes a regex and an input string and does the 
> matching.
>                 *
>             @@ -142,6 +145,9 @@ namespace __detail
>                    void
>                    _M_handle_alternative(_Match_mode, _StateIdT);
> 
>             +      void
>             +      _M_node(_Match_mode, _StateIdT);
>             +
>                    void
>                    _M_dfs(_Match_mode __match_mode, _StateIdT __start);
> 
>             @@ -290,6 +296,7 @@ namespace __detail
>                     };
> 
>                  public:
>             +      _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>>       
> _M_frames;
>                    _ResultsVec                                           
> _M_cur_results;
>                    _BiIter                                               
> _M_current;
>                    _BiIter                                               
> _M_begin;
>             diff --git a/libstdc++-v3/include/bits/regex_executor.tcc 
> b/libstdc++-v3/include/bits/regex_executor.tcc
>             index 421e585f39d9..bc7731953b0e 100644
>             --- a/libstdc++-v3/include/bits/regex_executor.tcc
>             +++ b/libstdc++-v3/include/bits/regex_executor.tcc
>             @@ -55,6 +55,71 @@ namespace __detail
>                    return false;
>                  }
> 
>             +  enum _ExecutorFrameOpcode : uint8_t
>             +  {
>             +    _S_fopcode_next,
>             +    _S_fopcode_fallback_next,
>             +    _S_fopcode_rep_once_more,
>             +    _S_fopcode_fallback_rep_once_more,
>             +    _S_fopcode_posix_alternative,
>             +    _S_fopcode_merge_sol,
>             +    _S_fopcode_restore_current,
>             +    _S_fopcode_restore_cur_results,
>             +    _S_fopcode_restore_rep_count,
>             +    _S_fopcode_decrement_rep_count,
>             +  };
>             +
>             +  template<typename _BiIter, bool _Trivial /* = 
> is_trivially_copyable<_BiIter>::value */>
>             +    struct _ExecutorFrame
>             +    {
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
>             +      : _M_op(__op), _M_state_id(__i)
>             +      { }
>             +
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, 
> _BiIter __p)
>             +      : _M_op(__op), _M_state_id(__i), _M_pos(__p)
>             +      { }
>             +
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, 
> long __v)
>             +      : _M_op(__op), _M_state_id(__i), _M_val(__v)
>             +      { }
>             +
>             +      _ExecutorFrameOpcode _M_op;
>             +      uint8_t _M_bytes[7];
>             +      _StateIdT _M_state_id;
>             +      // _M_pos and _M_val are mutually exclusive, which the 
> optimized
>             +      // partial specialization below depends on.
>             +      _BiIter _M_pos = _BiIter();
>             +      long _M_val = 0;
>             +    };
>             +
>             +  // Space-optimized partial specialization for when the input 
> iterator is
>             +  // trivially copyable.
>             +  template<typename _BiIter>
>             +    struct _ExecutorFrame<_BiIter, true>
>             +    {
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i)
>             +      : _M_op(__op), _M_state_id(__i)
>             +      { }
>             +
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, 
> _BiIter __p)
>             +      : _M_op(__op), _M_state_id(__i), _M_pos(__p)
>             +      { }
>             +
>             +      _ExecutorFrame(_ExecutorFrameOpcode __op, _StateIdT __i, 
> long __v)
>             +      : _M_op(__op), _M_state_id(__i), _M_val(__v)
>             +      { }
>             +
>             +      _ExecutorFrameOpcode _M_op;
>             +      uint8_t _M_bytes[7];
>             +      _StateIdT _M_state_id;
>             +      union
>             +      {
>             +       _BiIter _M_pos;
>             +       long _M_val;
>             +      };
>             +    };
>             +
>                // The _M_main function operates in different modes, DFS mode 
> or BFS mode,
>                // indicated by template parameter __dfs_mode, and dispatches 
> to one of the
>                // _M_main_dispatch overloads.
>             @@ -181,19 +246,20 @@ namespace __detail
>                    auto& __rep_count = _M_rep_count[__i];
>                    if (__rep_count.second == 0 || __rep_count.first != 
> _M_current)
>                     {
>             -         auto __back = __rep_count;
>             +         _M_frames.emplace_back(_S_fopcode_restore_rep_count,
>             +                                __i, __rep_count.first);
>             +         _M_frames.back()._M_bytes[0] = __rep_count.second;
>                       __rep_count.first = _M_current;
>                       __rep_count.second = 1;
>             -         _M_dfs(__match_mode, __state._M_alt);
>             -         __rep_count = __back;
>             +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>                     }
>                    else
>                     {
>                       if (__rep_count.second < 2)
>                         {
>                           __rep_count.second++;
>             -             _M_dfs(__match_mode, __state._M_alt);
>             -             __rep_count.second--;
>             +             
> _M_frames.emplace_back(_S_fopcode_decrement_rep_count, __i);
>             +             _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_alt);
>                         }
>                     }
>                  }
>             @@ -208,23 +274,23 @@ namespace __detail
>                  _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
>                  {
>                    const auto& __state = _M_nfa[__i];
>             -
>                    // Greedy.
>                    if (!__state._M_neg)
>                     {
>             -         _M_rep_once_more(__match_mode, __i);
>             -         // If it's DFS executor and already accepted, we're 
> done.
>             -         if (!__dfs_mode || !_M_has_sol)
>             -           _M_dfs(__match_mode, __state._M_next);
>             +         if constexpr (__dfs_mode)
>             +           // If it's DFS executor and already accepted, we're 
> done.
>             +           _M_frames.emplace_back(_S_fopcode_fallback_next, 
> __state._M_next);
>             +         else
>             +           _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>             +         _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
>                     }
>                    else // Non-greedy mode
>                     {
>                       if constexpr (__dfs_mode)
>                         {
>                           // vice-versa.
>             -             _M_dfs(__match_mode, __state._M_next);
>             -             if (!_M_has_sol)
>             -               _M_rep_once_more(__match_mode, __i);
>             +             
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
>             +             _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>                         }
>                       else
>                         {
>             @@ -233,12 +299,11 @@ namespace __detail
>                           // be better by attempting its next node.
>                           if (!_M_has_sol)
>                             {
>             -                 _M_dfs(__match_mode, __state._M_next);
>                               // DON'T attempt anything if it's already 
> accepted. An
>                               // accepted state *must* be better than a 
> solution that
>                               // matches a non-greedy quantifier one more 
> time.
>             -                 if (!_M_has_sol)
>             -                   _M_rep_once_more(__match_mode, __i);
>             +                 
> _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
>             +                 _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>                             }
>                         }
>                     }
>             @@ -250,12 +315,12 @@ namespace __detail
>                  _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT 
> __i)
>                  {
>                    const auto& __state = _M_nfa[__i];
>             -
>                    auto& __res = _M_cur_results[__state._M_subexpr];
>             -      auto __back = __res.first;
>             +      _M_frames.emplace_back(_S_fopcode_restore_cur_results,
>             +                            __state._M_subexpr, __res.first);
>             +      _M_frames.back()._M_bytes[0] = 0xff;
>                    __res.first = _M_current;
>             -      _M_dfs(__match_mode, __state._M_next);
>             -      __res.first = __back;
>             +      _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             @@ -264,13 +329,13 @@ namespace __detail
>                  _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT 
> __i)
>                  {
>                    const auto& __state = _M_nfa[__i];
>             -
>                    auto& __res = _M_cur_results[__state._M_subexpr];
>             -      auto __back = __res;
>             +      _M_frames.emplace_back(_S_fopcode_restore_cur_results,
>             +                            __state._M_subexpr, __res.second);
>             +      _M_frames.back()._M_bytes[0] = __res.matched;
>                    __res.second = _M_current;
>                    __res.matched = true;
>             -      _M_dfs(__match_mode, __state._M_next);
>             -      __res = __back;
>             +      _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             @@ -280,7 +345,7 @@ namespace __detail
>                  {
>                    const auto& __state = _M_nfa[__i];
>                    if (_M_at_begin())
>             -       _M_dfs(__match_mode, __state._M_next);
>             +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             @@ -290,7 +355,7 @@ namespace __detail
>                  {
>                    const auto& __state = _M_nfa[__i];
>                    if (_M_at_end())
>             -       _M_dfs(__match_mode, __state._M_next);
>             +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             @@ -300,7 +365,7 @@ namespace __detail
>                  {
>                    const auto& __state = _M_nfa[__i];
>                    if (_M_word_boundary() == !__state._M_neg)
>             -       _M_dfs(__match_mode, __state._M_next);
>             +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                // Here __state._M_alt offers a single start node for a 
> sub-NFA.
>             @@ -312,7 +377,7 @@ namespace __detail
>                  {
>                    const auto& __state = _M_nfa[__i];
>                    if (_M_lookahead(__state._M_alt) == !__state._M_neg)
>             -       _M_dfs(__match_mode, __state._M_next);
>             +       _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             @@ -321,16 +386,15 @@ namespace __detail
>                  _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
>                  {
>                    const auto& __state = _M_nfa[__i];
>             -
>                    if (_M_current == _M_end)
>                     return;
>                    if constexpr (__dfs_mode)
>                     {
>                       if (__state._M_matches(*_M_current))
>                         {
>             +             _M_frames.emplace_back(_S_fopcode_restore_current, 
> 0, _M_current);
>                           ++_M_current;
>             -             _M_dfs(__match_mode, __state._M_next);
>             -             --_M_current;
>             +             _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>                         }
>                     }
>                    else
>             @@ -413,13 +477,12 @@ namespace __detail
>                     {
>                       if (__last != _M_current)
>                         {
>             -             auto __backup = _M_current;
>             +             _M_frames.emplace_back(_S_fopcode_restore_current, 
> 0, _M_current);
>                           _M_current = __last;
>             -             _M_dfs(__match_mode, __state._M_next);
>             -             _M_current = __backup;
>             +             _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>                         }
>                       else
>             -           _M_dfs(__match_mode, __state._M_next);
>             +           _M_frames.emplace_back(_S_fopcode_next, 
> __state._M_next);
>                     }
>                  }
> 
>             @@ -483,31 +546,26 @@ namespace __detail
>                  _M_handle_alternative(_Match_mode __match_mode, _StateIdT 
> __i)
>                  {
>                    const auto& __state = _M_nfa[__i];
>             -
>                    if (_M_nfa._M_flags & regex_constants::ECMAScript)
>                     {
>                       // TODO: Fix BFS support. It is wrong.
>             -         _M_dfs(__match_mode, __state._M_alt);
>                       // Pick lhs if it matches. Only try rhs if it doesn't.
>             -         if (!_M_has_sol)
>             -           _M_dfs(__match_mode, __state._M_next);
>             +         _M_frames.emplace_back(_S_fopcode_fallback_next, 
> __state._M_next);
>             +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>                     }
>                    else
>                     {
>                       // Try both and compare the result.
>                       // See "case _S_opcode_accept:" handling above.
>             -         _M_dfs(__match_mode, __state._M_alt);
>             -         auto __has_sol = _M_has_sol;
>             -         _M_has_sol = false;
>             -         _M_dfs(__match_mode, __state._M_next);
>             -         _M_has_sol |= __has_sol;
>             +         _M_frames.emplace_back(_S_fopcode_posix_alternative, 
> __state._M_next);
>             +         _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
>                     }
>                  }
> 
>                template<typename _BiIter, typename _Alloc, typename _TraitsT,
>                        bool __dfs_mode>
>                  void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>             -    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
>             +    _M_node(_Match_mode __match_mode, _StateIdT __i)
>                  {
>                    if (_M_states._M_visited(__i))
>                     return;
>             @@ -545,6 +603,75 @@ namespace __detail
>                     }
>                  }
> 
>             +  template<typename _BiIter, typename _Alloc, typename _TraitsT,
>             +          bool __dfs_mode>
>             +    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
>             +    _M_dfs(_Match_mode __match_mode, _StateIdT __start)
>             +    {
>             +      _M_frames.emplace_back(_S_fopcode_next, __start);
>             +
>             +      while (!_M_frames.empty())
>             +       {
>             +         _ExecutorFrame<_BiIter> __frame = 
> std::move(_M_frames.back());
>             +         _M_frames.pop_back();
>             +
>             +         switch (__frame._M_op)
>             +           {
>             +           case _S_fopcode_next:
>             +             _M_node(__match_mode, __frame._M_state_id);
>             +             break;
>             +
>             +           case _S_fopcode_fallback_next:
>             +             if (!_M_has_sol)
>             +               _M_frames.emplace_back(_S_fopcode_next, 
> __frame._M_state_id);
> 
>       We are inserting frame here, potentially expanding the vector, while I 
> think
> this could be expressed equivalently as:
>    case _S_fopcode_fallback_next:
>       if (!_M_has_sol)
>         _M_node(__match_mode, __frame._M_state_id);
>     break;

Yes I think in theory we could do this shortcut wherever there's an
emplace_back call in the tail position (even in the subroutines e.g.
_M_rep_once_more) since we know we're going to immediately pop that
frame in the next iteration of the loop.  It does hurt readability a
little bit and makes the implementation less obviously non-recursive.
Maybe it's worthwhile doing it selectively where it makes an observable
performance difference (particularly with -O0).

Another option is to wrap the _M_frames stack in a class that contains
an additional member holding the top of the stack:

    struct {
      std::vector<_ExecutorFrame> _M_frames;
      _ExecutorFrame _M_top;
      // emplace_back, pop_back, back, empty
    };

That way consecutive push+pops won't touch the vector at all, I
think?  I'll try these ideas out.

> Or to make sure that fallback_next is same as next.
>    case _S_fopcode_fallback_next:
>        if (_M_has_sol)
>          break;
>        // fallthrough
>     case _S_fopcode_next:
>          _M_node(__match_mode, __frame._M_state_id);
>          break;

IIUC this isn't true after the 2nd patch which makes
_S_fopcode_fallback_next also restore _M_current.

>  WDYT?
>       +             break;
>       +
>       +           case _S_fopcode_fallback_rep_once_more:
>       +             if (!_M_has_sol)
>       +               _M_frames.emplace_back(_S_fopcode_rep_once_more,
>       +                                      __frame._M_state_id);
> 
>  
>       +             break;
>       +
>       +           case _S_fopcode_rep_once_more:
>       +             _M_rep_once_more(__match_mode, __frame._M_state_id);
>       +             break;
>       +
>       +           case _S_fopcode_posix_alternative:
>       +             _M_frames.emplace_back(_S_fopcode_merge_sol, 0, 
> _M_has_sol);
>       +             _M_frames.emplace_back(_S_fopcode_next, 
> __frame._M_state_id);
>       +             _M_has_sol = false;
>       +             break;
>       +
>       +           case _S_fopcode_merge_sol:
>       +             _M_has_sol |= __frame._M_val;
>       +             break;
>       +
>       +           case _S_fopcode_restore_current:
>       +             _M_current = __frame._M_pos;
>       +             break;
>       +
>       +           case _S_fopcode_restore_cur_results:
>       +             if (__frame._M_bytes[0] == 0xff)
>       +               _M_cur_results[__frame._M_state_id].first = 
> __frame._M_pos;
>       +             else
>       +               {
>       +                 _M_cur_results[__frame._M_state_id].second = 
> __frame._M_pos;
>       +                 _M_cur_results[__frame._M_state_id].matched = 
> __frame._M_bytes[0];
>       +               }
>       +             break;
>       +
>       +           case _S_fopcode_restore_rep_count:
>       +             _M_rep_count[__frame._M_state_id].first = __frame._M_pos;
>       +             _M_rep_count[__frame._M_state_id].second = 
> __frame._M_bytes[0];
>       +             break;
>       +
>       +           case _S_fopcode_decrement_rep_count:
>       +             _M_rep_count[__frame._M_state_id].second--;
>       +             break;
>       +           }
>       +       }
>       +    }
>       +
>          // Return whether now is at some word boundary.
>          template<typename _BiIter, typename _Alloc, typename _TraitsT,
>                  bool __dfs_mode>
>       diff --git a/libstdc++-v3/include/std/regex 
> b/libstdc++-v3/include/std/regex
>       index c8fecfb2ad7a..003499c1bab5 100644
>       --- a/libstdc++-v3/include/std/regex
>       +++ b/libstdc++-v3/include/std/regex
>       @@ -40,6 +40,7 @@
>        #else
> 
>        #include <bitset>
>       +#include <cstdint>
>        #include <locale>
>        #include <stack>
>        #include <stdexcept>
>       --
>       2.53.0.rc1.65.gea24e2c554
> 
> 
> 

Reply via email to