[clang-tools-extra] 743971f - Revert "[pseudo] Add error-recovery framework & brace-based recovery"

Sam McCall via cfe-commits Tue, 28 Jun 2022 12:11:23 -0700

Author: Sam McCall
Date: 2022-06-28T21:11:09+02:00
New Revision: 743971faaf84eaa0e6310bdc22c68f34c20330f1


URL: 
https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1
DIFF: 
https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1.diff

LOG: Revert "[pseudo] Add error-recovery framework & brace-based recovery"

This reverts commit a0f4c10ae227a62c2a63611e64eba83f0ff0f577.
This commit hadn't been reviewed yet, and was unintentionally included
on another branch.

Added: 
    

Modified: 
    clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
    clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
    clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
    clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
    clang-tools-extra/pseudo/lib/GLR.cpp
    clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
    clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
    clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
    clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
    clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
    clang-tools-extra/pseudo/unittests/GLRTest.cpp

Removed: 
    clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp


################################################################################
diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h 
b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index edc4b2acc0a8..a3e8611de425 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -144,26 +144,6 @@ void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
 void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
                const ParseParams &Params);
 
-// Heuristically recover from a state where no further parsing is possible.
-//
-// OldHeads is the parse state at TokenIndex.
-// This function consumes consumes zero or more tokens (advancing TokenIndex),
-// and places any recovery states created in NewHeads.
-//
-// On failure, NewHeads is empty and TokenIndex is unchanged.
-//
-// WARNING: glrRecover acts as a "fallback shift". If it consumes no tokens,
-// there is a risk of the parser falling into an infinite loop, creating an
-// endless sequence of recovery nodes.
-// Generally it is safe for recovery to match 0 tokens against sequence symbols
-// like `statement-seq`, as the grammar won't permit another statement-seq
-// immediately afterwards. However recovery strategies for `statement` should
-// consume at least one token, as statements may be adjacent in the input.
-void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
-                unsigned &TokenIndex, const TokenStream &Tokens,
-                const ParseParams &Params,
-                std::vector<const GSS::Node *> &NewHeads);
-
 } // namespace pseudo
 } // namespace clang
 

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h 
b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index 7240f5adbb03..382da41397f0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -81,12 +81,9 @@ inline tok::TokenKind symbolToToken(SymbolID SID) {
   assert(SID < NumTerminals);
   return static_cast<tok::TokenKind>(SID);
 }
-inline constexpr SymbolID tokenSymbol(tok::TokenKind TK) {
+inline SymbolID tokenSymbol(tok::TokenKind TK) {
   return TokenFlag | static_cast<SymbolID>(TK);
 }
-// Error recovery strategies.
-// FIXME: these should be provided as extensions instead.
-enum class RecoveryStrategy : uint8_t { None, Braces };
 
 // An extension is a piece of native code specific to a grammar that modifies
 // the behavior of annotated rules. One ExtensionID is assigned for each unique
@@ -110,7 +107,7 @@ struct Rule {
   // length to 9 (this is the longest sequence in cxx grammar).
   static constexpr unsigned SizeBits = 4;
   static constexpr unsigned MaxElements = 9;
-  static_assert(MaxElements < (1 << SizeBits), "Exceeds the maximum limit");
+  static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit");
   static_assert(SizeBits + SymbolBits <= 16,
                 "Must be able to store symbol ID + size efficiently");
 
@@ -126,13 +123,6 @@ struct Rule {
   // being set for this rule.
   ExtensionID Guard = 0;
 
-  // Specifies the index within Sequence eligible for error recovery.
-  // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we
-  // should recover by finding the matching brace, and forcing stmt-seq to 
match
-  // everything between braces.
-  uint8_t RecoveryIndex = -1;
-  RecoveryStrategy Recovery = RecoveryStrategy::None;
-
   llvm::ArrayRef<SymbolID> seq() const {
     return llvm::ArrayRef<SymbolID>(Sequence, Size);
   }

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h 
b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
index f5c2cc7a1623..1b5365327431 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
@@ -137,20 +137,8 @@ class LRGraph {
     SymbolID Label;
   };
 
-  // A possible error recovery: choose to match some tokens against a symbol.
-  //
-  // e.g. a state that contains
-  //   stmt := { . stmt-seq [recover=braces] }
-  // has a Recovery { Src = S, Strategy=braces, Result=stmt-seq }.
-  struct Recovery {
-    StateID Src; // The state we are in when encountering the error.
-    RecoveryStrategy Strategy; // Heuristic choosing the tokens to match.
-    SymbolID Result;           // The symbol that is produced.
-  };
-
   llvm::ArrayRef<State> states() const { return States; }
   llvm::ArrayRef<Edge> edges() const { return Edges; }
-  llvm::ArrayRef<Recovery> recoveries() const { return Recoveries; }
   llvm::ArrayRef<std::pair<SymbolID, StateID>> startStates() const {
     return StartStates;
   }
@@ -159,15 +147,12 @@ class LRGraph {
 
 private:
   LRGraph(std::vector<State> States, std::vector<Edge> Edges,
-          std::vector<Recovery> Recoveries,
           std::vector<std::pair<SymbolID, StateID>> StartStates)
       : States(std::move(States)), Edges(std::move(Edges)),
-        Recoveries(std::move(Recoveries)), StartStates(std::move(StartStates)) 
{
-  }
+        StartStates(std::move(StartStates)) {}
 
   std::vector<State> States;
   std::vector<Edge> Edges;
-  std::vector<Recovery> Recoveries;
   std::vector<std::pair<SymbolID, StateID>> StartStates;
 };
 

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h 
b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
index c8e48dbaf309..70ce52924f11 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@@ -121,14 +121,6 @@ class LRTable {
     uint16_t Value : ValueBits;
   };
 
-  struct Recovery {
-    RecoveryStrategy Strategy;
-    SymbolID Result;
-  };
-
-  // Returns all available actions for the given state on a terminal.
-  // Expected to be called by LR parsers.
-  llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
   // Returns the state after we reduce a nonterminal.
   // Expected to be called by LR parsers.
   // REQUIRES: Nonterminal is valid here.
@@ -159,12 +151,6 @@ class LRTable {
                            symbolToToken(Terminal));
   }
 
-  // Looks up available recovery actions if we stopped parsing in this state.
-  llvm::ArrayRef<Recovery> getRecovery(StateID State) const {
-    return llvm::makeArrayRef(Recoveries.data() + RecoveryOffset[State],
-                              Recoveries.data() + RecoveryOffset[State + 1]);
-  }
-
   // Returns the state from which the LR parser should start to parse the input
   // tokens as the given StartSymbol.
   //
@@ -202,15 +188,9 @@ class LRTable {
     StateID State;
     RuleID Rule;
   };
-  struct RecoveryEntry {
-    StateID State;
-    RecoveryStrategy Strategy;
-    SymbolID Result;
-  };
-  // Build a specified table for testing purposes.
-  static LRTable buildForTests(const Grammar &, llvm::ArrayRef<Entry>,
-                               llvm::ArrayRef<ReduceEntry>,
-                               llvm::ArrayRef<RecoveryEntry> = {});
+  // Build a specifid table for testing purposes.
+  static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
+                               llvm::ArrayRef<ReduceEntry>);
 
 private:
   // Looks up actions stored in the generic table.
@@ -242,11 +222,6 @@ class LRTable {
   // This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token)
   // as an index: Nonterminal * NUM_TOKENS + Token.
   llvm::BitVector FollowSets;
-
-  // Recovery stores all recovery actions from all states.
-  // A given state has [RecoveryOffset[S], RecoveryOffset[S+1]).
-  std::vector<uint32_t> RecoveryOffset;
-  std::vector<Recovery> Recoveries;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
 

diff  --git a/clang-tools-extra/pseudo/lib/GLR.cpp 
b/clang-tools-extra/pseudo/lib/GLR.cpp
index ccb3cd770f7f..0b9cf46245a9 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -24,156 +24,6 @@
 
 namespace clang {
 namespace pseudo {
-namespace {
-
-llvm::Optional<unsigned>
-findRecoveryEndpoint(RecoveryStrategy Strategy,
-                     const GSS::Node *RecoveryNode,
-                     const TokenStream &Tokens) {
-  assert(Strategy == RecoveryStrategy::Braces);
-  const ForestNode *LBrace = RecoveryNode->Payload;
-  assert(LBrace->kind() == ForestNode::Terminal &&
-         LBrace->symbol() == tokenSymbol(tok::l_brace));
-  if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair())
-    return Tokens.index(*RBrace);
-  return llvm::None;
-}
-
-} // namespace
-
-void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
-                unsigned &TokenIndex, const TokenStream &Tokens,
-                const ParseParams &Params,
-                std::vector<const GSS::Node *> &NewHeads) {
-  LLVM_DEBUG(llvm::dbgs() << "Recovery at token " << TokenIndex << "...\n");
-  // Describes a possibility to recover by forcibly interpreting a range of
-  // tokens around the cursor as a nonterminal that we expected to see.
-  struct PlaceholderRecovery {
-    // The token prior to the nonterminal which is being recovered.
-    // This starts of the region we're skipping, so higher Position is better.
-    Token::Index Position;
-    // The nonterminal which will be created in order to recover.
-    SymbolID Symbol;
-    // The heuristic used to choose the bounds of the nonterminal to recover.
-    RecoveryStrategy Strategy;
-
-    // The GSS head where we are expecting the recovered nonterminal.
-    const GSS::Node *RecoveryNode;
-    // Payload of nodes on the way back from the OldHead to the recovery node.
-    // These represent the partial parse that is being discarded.
-    // They should become the children of the opaque recovery node.
-    //
-    // There may be multiple paths leading to the same recovery node, we choose
-    // one arbitrarily.
-    std::vector<const ForestNode *> Path;
-  };
-  std::vector<PlaceholderRecovery> Options;
-
-  // Find recovery options by walking up the stack.
-  //
-  // This is similar to exception handling: we walk up the "frames" of nested
-  // rules being parsed until we find one that has a "handler" which allows us
-  // to determine the node bounds without parsing it.
-  //
-  // Unfortunately there's a significant 
diff erence: the stack contains both
-  // "upward" nodes (ancestor parses) and "leftward" ones.
-  //  e.g. when parsing `int(2 + ?)`, the stack contains:
-  //    expr := expr + . expr     - which we're currently parsing
-  //    expr := type ( . expr )   - (up) we should recover this outer expr
-  //    expr := . type ( expr )   - (up+left) we should not recover this type!
-  //
-  // It's not obvious how to avoid collecting the latter as a recovery option.
-  // I think the distinction is ill-defined after merging items into states.
-  // For now, we have to take this into account when defining recovery rules.
-  // FIXME: find a more satisfying way to avoid such false recovery.
-  std::vector<const ForestNode *> Path;
-  llvm::DenseSet<const GSS::Node *> Seen;
-  auto DFS = [&](const GSS::Node *N, Token::Index NextTok, auto &DFS) {
-    if (!Seen.insert(N).second)
-      return;
-    for (auto Strategy : Params.Table.getRecovery(N->State)) {
-      Options.push_back(PlaceholderRecovery{
-          NextTok,
-          Strategy.Result,
-          Strategy.Strategy,
-          N,
-          Path,
-      });
-      LLVM_DEBUG(llvm::dbgs()
-                 << "Option: recover " << Params.G.symbolName(Strategy.Result)
-                 << " at token " << NextTok << "\n");
-    }
-    Path.push_back(N->Payload);
-    for (const GSS::Node *Parent : N->parents())
-      DFS(Parent, N->Payload->startTokenIndex(), DFS);
-    Path.pop_back();
-  };
-  for (auto *N : llvm::reverse(OldHeads))
-    DFS(N, TokenIndex, DFS);
-
-  // Now we select the option(s) we will use to recover.
-  //
-  // We prefer options starting further right, as these discard less code
-  // (e.g. we prefer to recover inner scopes rather than outer ones).
-  // The options also need to agree on an endpoint, so the parser has a
-  // consistent position afterwards.
-  //
-  // So conceptually we're sorting by the tuple (start, end), though we avoid
-  // computing `end` for options that can't be winners.
-
-  // Consider options starting further right first.
-  // Don't drop the others yet though, we may still use them if preferred 
fails.
-  llvm::stable_sort(Options, [&](const auto &L, const auto &R) {
-    return L.Position > R.Position;
-  });
-
-  assert(NewHeads.empty()); // We may repeatedly populate and clear it.
-  llvm::Optional<Token::Range> RecoveryRange;
-  for (const PlaceholderRecovery &Option : Options) {
-    // If this starts further right than options we've already found, then
-    // we'll never find anything better. Skip computing End for the rest.
-    if (RecoveryRange && Option.Position < RecoveryRange->Begin)
-      break;
-
-    auto End =
-        findRecoveryEndpoint(Option.Strategy, Option.RecoveryNode, Tokens);
-    // Only consider recovery that advances the parse.
-    if (!End || *End <= TokenIndex)
-      continue;
-    if (RecoveryRange) {
-      // If this is worse than our previous options, ignore it.
-      if (RecoveryRange->End < *End)
-        continue;
-      // If this is an improvement over our previous options, then drop them.
-      if (RecoveryRange->End > *End)
-        NewHeads.clear();
-    }
-    // Create recovery nodes and heads for them in the GSS. These may be
-    // discarded if a better recovery is later found, but this path isn't hot.
-    RecoveryRange = {Option.Position, *End};
-    const ForestNode &Placeholder =
-        Params.Forest.createOpaque(Option.Symbol, Option.Position);
-    const GSS::Node *NewHead = Params.GSStack.addNode(
-        Params.Table.getGoToState(Option.RecoveryNode->State, Option.Symbol),
-        &Placeholder, {Option.RecoveryNode});
-    NewHeads.push_back(NewHead);
-  }
-
-  // Advance the cursor, whether recovery succeeded or not.
-  if (RecoveryRange) {
-    LLVM_DEBUG({
-      llvm::dbgs() << "Recovered range=" << *RecoveryRange << ":";
-      for (const auto *Head : NewHeads)
-        llvm::dbgs() << " " << Params.G.symbolName(Head->Payload->symbol());
-      llvm::dbgs() << "\n";
-    });
-    TokenIndex = RecoveryRange->End;
-  } else {
-    LLVM_DEBUG(llvm::dbgs() << "Recovery failed after trying " << 
Options.size()
-                            << " strategies\n");
-    ++TokenIndex;
-  }
-}
 
 using StateID = LRTable::StateID;
 
@@ -181,9 +31,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const 
GSS::Node &N) {
   std::vector<std::string> ParentStates;
   for (const auto *Parent : N.parents())
     ParentStates.push_back(llvm::formatv("{0}", Parent->State));
-  OS << llvm::formatv("state {0}, parsed symbol {1}, parents {3}", N.State,
-                      N.Payload ? N.Payload->symbol() : 0,
-                      llvm::join(ParentStates, ", "));
+  OS << llvm::formatv("state {0}, parsed symbol {1}, parents {2}", N.State,
+                      N.Payload->symbol(), llvm::join(ParentStates, ", "));
   return OS;
 }
 
@@ -578,27 +427,15 @@ const ForestNode &glrParse(const TokenStream &Tokens, 
const ParseParams &Params,
     GSS.gc(std::move(Roots));
   };
   // Each iteration fully processes a single token.
-  for (unsigned I = 0; I < Terminals.size();) {
+  for (unsigned I = 0; I < Terminals.size(); ++I) {
     LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
                    "Next token {0} (id={1})\n",
                    G.symbolName(Terminals[I].symbol()), 
Terminals[I].symbol()));
     // Consume the token.
     glrShift(Heads, Terminals[I], Params, NextHeads);
-
-    // If we weren't able to consume the token, try to skip over some tokens
-    // so we can keep parsing.
-    if (NextHeads.empty()) {
-      glrRecover(Heads, I, Tokens, Params, NextHeads);
-      if (NextHeads.empty())
-        // FIXME: Ensure the `_ := start-symbol` rules have a fallback
-        // error-recovery strategy attached. Then this condition can't happen.
-        return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
-    } else
-      ++I;
-
     // Form nonterminals containing the token we just consumed.
-    SymbolID Lookahead =
-        I == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I].symbol();
+    SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof)
+                                                   : Terminals[I + 1].symbol();
     Reduce(NextHeads, Lookahead);
     // Prepare for the next token.
     std::swap(Heads, NextHeads);
@@ -607,35 +444,22 @@ const ForestNode &glrParse(const TokenStream &Tokens, 
const ParseParams &Params,
   }
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
 
-  // The parse was successful if we're in state `_ := start-symbol .`
   StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
-  auto SearchForAccept = [&](llvm::ArrayRef<const GSS::Node *> Heads) {
-    const ForestNode *Result = nullptr;
-    for (const auto *Head : Heads) {
-      if (Head->State == AcceptState) {
-        assert(Head->Payload->symbol() == StartSymbol);
-        assert(Result == nullptr && "multiple results!");
-        Result = Head->Payload;
-      }
+  const ForestNode *Result = nullptr;
+  for (const auto *Head : Heads) {
+    if (Head->State == AcceptState) {
+      assert(Head->Payload->symbol() == StartSymbol);
+      assert(Result == nullptr && "multiple results!");
+      Result = Head->Payload;
     }
-    return Result;
-  };
-  if (auto *Result = SearchForAccept(Heads))
-    return *Result;
-  // Failed to parse the input, attempt to run recovery.
-  // FIXME: this awkwardly repeats the recovery in the loop, when shift fails.
-  // More elegant is to include EOF in the token stream, and make the
-  // augmented rule: `_ := translation-unit EOF`. In this way recovery at EOF
-  // would not be a special case: it show up as a failure to shift the EOF
-  // token.
-  unsigned I = Terminals.size();
-  glrRecover(Heads, I, Tokens, Params, NextHeads);
-  Reduce(NextHeads, tokenSymbol(tok::eof));
-  if (auto *Result = SearchForAccept(NextHeads))
+  }
+  if (Result)
     return *Result;
-
   // We failed to parse the input, returning an opaque forest node for 
recovery.
-  // FIXME: as above, we can add fallback error handling so this is impossible.
+  //
+  // FIXME: We will need to invoke our generic error-recovery handlers when we
+  // reach EOF without reaching accept state, and involving the eof
+  // token in the above main for-loopmay be the best way to reuse the code).
   return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
 }
 
@@ -646,10 +470,9 @@ void glrReduce(std::vector<const GSS::Node *> &Heads, 
SymbolID Lookahead,
 }
 
 const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol,
-
                               llvm::ArrayRef<const Node *> Parents) {
   Node *Result = new (allocate(Parents.size()))
-      Node({State, GCParity, static_cast<uint16_t>(Parents.size())});
+      Node({State, GCParity, static_cast<unsigned>(Parents.size())});
   Alive.push_back(Result);
   ++NodesCreated;
   Result->Payload = Symbol;

diff  --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp 
b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
index dc4d95820264..8e3bcb7afb37 100644
--- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -59,11 +59,8 @@ std::string Grammar::dumpRule(RuleID RID) const {
   llvm::raw_string_ostream OS(Result);
   const Rule &R = T->Rules[RID];
   OS << symbolName(R.Target) << " :=";
-  for (unsigned I = 0; I < R.Size; ++I) {
-    OS << " " << symbolName(R.Sequence[I]);
-    if (R.RecoveryIndex == I)
-      OS << " [recover=" << static_cast<unsigned>(R.Recovery) << "]";
-  }
+  for (SymbolID SID : R.seq())
+    OS << " " << symbolName(SID);
   if (R.Guard)
     OS << " [guard=" << T->AttributeValues[R.Guard] << "]";
   return Result;

diff  --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp 
b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
index 281c08681c92..9fbc34da7155 100644
--- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -106,17 +106,6 @@ class GrammarBuilder {
 
     assert(T->Rules.size() < (1 << RuleBits) &&
            "Too many rules to fit in RuleID bits!");
-    // Wherever RHS contains { foo }, mark foo for brace-recovery.
-    // FIXME: this should be grammar annotations instead.
-    for (auto &Rule : T->Rules) {
-      for (unsigned I = 2; I < Rule.Size; ++I)
-        if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) &&
-            Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace) &&
-            !isToken(Rule.Sequence[I - 1])) {
-          Rule.Recovery = RecoveryStrategy::Braces;
-          Rule.RecoveryIndex = I - 1;
-        }
-    }
     const auto &SymbolOrder = getTopologicalOrder(T.get());
     llvm::stable_sort(
         T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) {

diff  --git a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp 
b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
index 290a4f32f576..5312a675cdb8 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
@@ -120,20 +120,6 @@ nextAvailableKernelItems(const State &S, const Grammar &G) 
{
   return Results;
 }
 
-std::vector<std::pair<RecoveryStrategy, SymbolID>>
-availableRecovery(const State &S, const Grammar &G) {
-  std::vector<std::pair<RecoveryStrategy, SymbolID>> Result;
-  for (const Item &I : S.Items) {
-    const auto &Rule = G.lookupRule(I.rule());
-    if (I.dot() != Rule.RecoveryIndex)
-      continue;
-    Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]});
-  }
-  llvm::sort(Result);
-  Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
-  return Result;
-}
-
 } // namespace
 
 std::string Item::dump(const Grammar &G) const {
@@ -144,10 +130,9 @@ std::string Item::dump(const Grammar &G) const {
       Results.push_back(G.symbolName(SID));
     return Results;
   };
-  return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target),
+  return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target),
                        llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "),
-                       llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "),
-                       Rule.RecoveryIndex == DotPos ? " [recovery]" : "")
+                       llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "))
       .str();
 }
 
@@ -196,11 +181,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
       Edges.push_back({Src, Dst, Label});
     }
 
-    void insertRecovery(StateID Src, RecoveryStrategy Strategy,
-                        SymbolID Result) {
-      Recoveries.push_back({Src, Strategy, Result});
-    }
-
     // Returns a state with the given id.
     const State &find(StateID ID) const {
       assert(ID < States.size());
@@ -214,10 +194,9 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
     LRGraph build() && {
       States.shrink_to_fit();
       Edges.shrink_to_fit();
-      Recoveries.shrink_to_fit();
       llvm::sort(StartStates);
       StartStates.shrink_to_fit();
-      return LRGraph(std::move(States), std::move(Edges), 
std::move(Recoveries),
+      return LRGraph(std::move(States), std::move(Edges),
                      std::move(StartStates));
     }
 
@@ -226,7 +205,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
     llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex;
     std::vector<State> States;
     std::vector<Edge> Edges;
-    std::vector<Recovery> Recoveries;
     const Grammar &G;
     std::vector<std::pair<SymbolID, StateID>> StartStates;
   } Builder(G);
@@ -247,16 +225,15 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
   }
 
   while (!PendingStates.empty()) {
-    auto StateID = PendingStates.back();
+    auto CurrentStateID = PendingStates.back();
     PendingStates.pop_back();
-    for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) {
+    for (auto Next :
+         nextAvailableKernelItems(Builder.find(CurrentStateID), G)) {
       auto Insert = Builder.insert(Next.second);
       if (Insert.second) // new state, insert to the pending queue.
         PendingStates.push_back(Insert.first);
-      Builder.insertEdge(StateID, Insert.first, Next.first);
+      Builder.insertEdge(CurrentStateID, Insert.first, Next.first);
     }
-    for (auto Recovery : availableRecovery(Builder.find(StateID), G))
-      Builder.insertRecovery(StateID, Recovery.first, Recovery.second);
   }
   return std::move(Builder).build();
 }

diff  --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp 
b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
index 3d36042044ad..59ea4ce5e327 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -11,7 +11,6 @@
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 
 namespace llvm {
@@ -45,7 +44,6 @@ struct LRTable::Builder {
   llvm::DenseSet<Entry> Entries;
   llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
   std::vector<llvm::DenseSet<SymbolID>> FollowSets;
-  std::vector<LRGraph::Recovery> Recoveries;
 
   LRTable build(unsigned NumStates) && {
     // E.g. given the following parsing table with 3 states and 3 terminals:
@@ -90,26 +88,6 @@ struct LRTable::Builder {
     }
     Table.StartStates = std::move(StartStates);
 
-    // Error recovery entries: sort (no dups already), and build offset lookup.
-    llvm::sort(Recoveries,
-               [&](const LRGraph::Recovery &L, const LRGraph::Recovery &R) {
-                 return std::tie(L.Src, L.Result, L.Strategy) <
-                        std::tie(R.Src, R.Result, R.Strategy);
-               });
-    Table.Recoveries.reserve(Recoveries.size());
-    for (const auto &R : Recoveries)
-      Table.Recoveries.push_back({R.Strategy, R.Result});
-    Table.RecoveryOffset = std::vector<uint32_t>(NumStates + 1, 0);
-    SortedIndex = 0;
-    for (StateID State = 0; State < NumStates; ++State) {
-      Table.RecoveryOffset[State] = SortedIndex;
-      while (SortedIndex < Recoveries.size() &&
-             Recoveries[SortedIndex].Src == State)
-        SortedIndex++;
-    }
-    Table.RecoveryOffset[NumStates] = SortedIndex;
-    assert(SortedIndex == Recoveries.size());
-
     // Compile the follow sets into a bitmap.
     Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
     for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
@@ -136,8 +114,7 @@ struct LRTable::Builder {
 };
 
 LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
-                               llvm::ArrayRef<ReduceEntry> Reduces,
-                               llvm::ArrayRef<RecoveryEntry> Recoveries) {
+                               llvm::ArrayRef<ReduceEntry> Reduces) {
   StateID MaxState = 0;
   for (const auto &Entry : Entries) {
     MaxState = std::max(MaxState, Entry.State);
@@ -151,8 +128,6 @@ LRTable LRTable::buildForTests(const Grammar &G, 
llvm::ArrayRef<Entry> Entries,
   for (const ReduceEntry &E : Reduces)
     Build.Reduces[E.State].insert(E.Rule);
   Build.FollowSets = followSets(G);
-  for (const auto &R : Recoveries)
-    Build.Recoveries.push_back({R.State, R.Strategy, R.Result});
   return std::move(Build).build(/*NumStates=*/MaxState + 1);
 }
 
@@ -160,7 +135,6 @@ LRTable LRTable::buildSLR(const Grammar &G) {
   auto Graph = LRGraph::buildLR0(G);
   Builder Build;
   Build.StartStates = Graph.startStates();
-  Build.Recoveries = Graph.recoveries();
   for (const auto &T : Graph.edges()) {
     Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
     Build.Entries.insert({T.Src, T.Label, Act});

diff  --git a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp 
b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
index bcfdff192b99..6d7a6823d0bf 100644
--- a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
+++ b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
@@ -2,7 +2,7 @@
 class Foo {
 public:
 };
-// CHECK:      decl-specifier-seq~class-specifier := class-head { 
member-specification [recover=1] }
+// CHECK:      decl-specifier-seq~class-specifier := class-head { 
member-specification }
 // CHECK-NEXT: ├─class-head := class-key class-head-name
 // CHECK-NEXT: │ ├─class-key~CLASS := tok[0]
 // CHECK-NEXT: │ └─class-head-name~IDENTIFIER := tok[1]

diff  --git a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp 
b/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
deleted file mode 100644
index a8bbe88dadd8..000000000000
--- a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | 
FileCheck %s
-auto x = { complete garbage };
-// CHECK:      translation-unit~simple-declaration
-// CHECK-NEXT: ├─decl-specifier-seq~AUTO := tok[0]
-// CHECK-NEXT: ├─init-declarator-list~init-declarator
-// CHECK-NEXT: │ ├─declarator~IDENTIFIER := tok[1]
-// CHECK-NEXT: │ └─initializer~brace-or-equal-initializer
-// CHECK-NEXT: │   ├─= := tok[2]
-// CHECK-NEXT: │   └─initializer-clause~braced-init-list
-// CHECK-NEXT: │     ├─{ := tok[3]
-// CHECK-NEXT: │     ├─initializer-list := <opaque>
-// CHECK-NEXT: │     └─} := tok[6]
-// CHECK-NEXT: └─; := tok[7]

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp 
b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index a37fda3fe2f5..846f2dbf1677 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -7,7 +7,6 @@
 
//===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/GLR.h"
-#include "clang-pseudo/Bracket.h"
 #include "clang-pseudo/Token.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang/Basic/LangOptions.h"
@@ -33,13 +32,11 @@ namespace {
 using Action = LRTable::Action;
 using testing::AllOf;
 using testing::ElementsAre;
-using testing::IsEmpty;
 using testing::UnorderedElementsAre;
 
 MATCHER_P(state, StateID, "") { return arg->State == StateID; }
 MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; }
 MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; }
-MATCHER_P(start, Start, "") { return arg->Payload->startTokenIndex() == Start; 
}
 
 testing::Matcher<const GSS::Node *>
 parents(llvm::ArrayRef<const GSS::Node *> Parents) {
@@ -241,9 +238,9 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
       /*State=*/1, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0});
   const auto *GSSNode2 = GSStack.addNode(
       /*State=*/2, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0});
-  const auto *GSSNode3 = GSStack.addNode(
-      /*State=*/3, /*ForestNode=*/ClassNameNode,
-      /*Parents=*/{GSSNode1});
+  const auto *GSSNode3 =
+      GSStack.addNode(/*State=*/3, /*ForestNode=*/ClassNameNode,
+                      /*Parents=*/{GSSNode1});
   const auto *GSSNode4 =
       GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode,
                       /*Parents=*/{GSSNode2});
@@ -366,124 +363,6 @@ TEST_F(GLRTest, ReduceLookahead) {
   EXPECT_THAT(Heads, ElementsAre(GSSNode1));
 }
 
-TEST_F(GLRTest, Recover) {
-  // Recovery while parsing "word" inside braces.
-  //  Before:
-  //    0--1({)--2(?)
-  //  After recovering a `word` at state 1:
-  //    0--3(word)  // 3 is goto(1, word)
-  buildGrammar({"word"}, {});
-  LRTable Table = LRTable::buildForTests(
-      G, {{/*State=*/1, id("word"), Action::goTo(3)}}, /*Reduce=*/{},
-      /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("word")}});
-
-  auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
-  auto *Question1 = &Arena.createTerminal(tok::question, 1);
-  const auto *Root = GSStack.addNode(0, nullptr, {});
-  const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root});
-  const auto *AfterQuestion1 = GSStack.addNode(2, Question1, {OpenedBraces});
-
-  // Need a token stream with paired braces so the strategy works.
-  clang::LangOptions LOptions;
-  TokenStream Tokens = cook(lex("{ ? ? ? }", LOptions), LOptions);
-  pairBrackets(Tokens);
-  std::vector<const GSS::Node *> NewHeads;
-
-  unsigned TokenIndex = 2;
-  glrRecover({AfterQuestion1}, TokenIndex, Tokens, {G, Table, Arena, GSStack},
-             NewHeads);
-  EXPECT_EQ(TokenIndex, 4u) << "should skip ahead to matching brace";
-  EXPECT_THAT(NewHeads, ElementsAre(
-                            AllOf(state(3), parsedSymbolID(id("word")),
-                                  parents({OpenedBraces}), start(1u))));
-  EXPECT_EQ(NewHeads.front()->Payload->kind(), ForestNode::Opaque);
-
-  // Test recovery failure: omit closing brace so strategy fails
-  TokenStream NoRBrace = cook(lex("{ ? ? ? ?", LOptions), LOptions);
-  pairBrackets(NoRBrace);
-  NewHeads.clear();
-  TokenIndex = 2;
-  glrRecover({AfterQuestion1}, TokenIndex, NoRBrace,
-             {G, Table, Arena, GSStack}, NewHeads);
-  EXPECT_EQ(TokenIndex, 3u) << "should advance by 1 by default";
-  EXPECT_THAT(NewHeads, IsEmpty());
-}
-
-TEST_F(GLRTest, RecoverRightmost) {
-  // In a nested block structure, we recover at the innermost possible block.
-  //  Before:
-  //    0--1({)--1({)--1({)
-  //  After recovering a `block` at inside the second braces:
-  //    0--1({)--2(body)  // 2 is goto(1, body)
-  buildGrammar({"body"}, {});
-  LRTable Table = LRTable::buildForTests(
-      G, {{/*State=*/1, id("body"), Action::goTo(2)}}, /*Reduce=*/{},
-      /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("body")}});
-
-  clang::LangOptions LOptions;
-  // Innermost brace is unmatched, to test fallback to next brace.
-  TokenStream Tokens = cook(lex("{ { { ? ? } }", LOptions), LOptions);
-  Tokens.tokens()[0].Pair = 5;
-  Tokens.tokens()[1].Pair = 4;
-  Tokens.tokens()[4].Pair = 1;
-  Tokens.tokens()[5].Pair = 0;
-
-  auto *Brace1 = &Arena.createTerminal(tok::l_brace, 0);
-  auto *Brace2 = &Arena.createTerminal(tok::l_brace, 1);
-  auto *Brace3 = &Arena.createTerminal(tok::l_brace, 2);
-  const auto *Root = GSStack.addNode(0, nullptr, {});
-  const auto *In1 = GSStack.addNode(1, Brace1, {Root});
-  const auto *In2 = GSStack.addNode(1, Brace2, {In1});
-  const auto *In3 = GSStack.addNode(1, Brace3, {In2});
-
-  unsigned TokenIndex = 3;
-  std::vector<const GSS::Node *> NewHeads;
-  glrRecover({In3}, TokenIndex, Tokens, {G, Table, Arena, GSStack}, NewHeads);
-  EXPECT_EQ(TokenIndex, 5u);
-  EXPECT_THAT(NewHeads, ElementsAre(AllOf(state(2), parsedSymbolID(id("body")),
-                                          parents({In2}), start(2u))));
-}
-
-TEST_F(GLRTest, RecoverAlternatives) {
-  // Recovery inside braces with multiple equally good options
-  //  Before:
-  //    0--1({)
-  //  After recovering either `word` or `number` inside the braces:
-  //    0--1({)--2(word)   // 2 is goto(1, word)
-  //          └--3(number) // 3 is goto(1, number)
-  buildGrammar({"number", "word"}, {});
-  LRTable Table = LRTable::buildForTests(
-      G,
-      {
-          {/*State=*/1, id("number"), Action::goTo(2)},
-          {/*State=*/1, id("word"), Action::goTo(3)},
-      },
-      /*Reduce=*/{},
-      /*Recovery=*/
-      {
-          {/*State=*/1, RecoveryStrategy::Braces, id("number")},
-          {/*State=*/1, RecoveryStrategy::Braces, id("word")},
-      });
-  auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
-  const auto *Root = GSStack.addNode(0, nullptr, {});
-  const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root});
-
-  clang::LangOptions LOptions;
-  TokenStream Tokens = cook(lex("{ ? }", LOptions), LOptions);
-  pairBrackets(Tokens);
-  std::vector<const GSS::Node *> NewHeads;
-  unsigned TokenIndex = 1;
-
-  glrRecover({OpenedBraces}, TokenIndex, Tokens, {G, Table, Arena, GSStack},
-             NewHeads);
-  EXPECT_EQ(TokenIndex, 2u);
-  EXPECT_THAT(NewHeads,
-              UnorderedElementsAre(AllOf(state(2), 
parsedSymbolID(id("number")),
-                                         parents({OpenedBraces}), start(1u)),
-                                   AllOf(state(3), parsedSymbolID(id("word")),
-                                         parents({OpenedBraces}), start(1u))));
-}
-
 TEST_F(GLRTest, PerfectForestNodeSharing) {
   // Run the GLR on a simple grammar and test that we build exactly one forest
   // node per (SymbolID, token range).
@@ -552,40 +431,6 @@ TEST_F(GLRTest, GLRReduceOrder) {
                                      "[  0, end)     └─IDENTIFIER := 
tok[0]\n");
 }
 
-TEST_F(GLRTest, RecoveryEndToEnd) {
-  // Simple example of brace-based recovery showing:
-  //  - recovered region includes tokens both ahead of and behind the cursor
-  //  - multiple possible recovery rules
-  //  - recovery from outer scopes is rejected
-  build(R"bnf(
-    _ := block
-
-    block := { block }
-    block := { numbers }
-    numbers := NUMERIC_CONSTANT NUMERIC_CONSTANT
-  )bnf");
-  auto LRTable = LRTable::buildSLR(G);
-  clang::LangOptions LOptions;
-  TokenStream Tokens = cook(lex("{ { 42 ? } }", LOptions), LOptions);
-  pairBrackets(Tokens);
-
-  const ForestNode &Parsed =
-      glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("block"));
-  EXPECT_EQ(Parsed.dumpRecursive(G),
-            "[  0, end) block := { block [recover=1] }\n"
-            "[  0,   1) ├─{ := tok[0]\n"
-            "[  1,   5) ├─block := <ambiguous>\n"
-            "[  1,   5) │ ├─block := { block [recover=1] }\n"
-            "[  1,   2) │ │ ├─{ := tok[1]\n"
-            "[  2,   4) │ │ ├─block := <opaque>\n"
-            "[  4,   5) │ │ └─} := tok[4]\n"
-            "[  1,   5) │ └─block := { numbers [recover=1] }\n"
-            "[  1,   2) │   ├─{ := tok[1]\n"
-            "[  2,   4) │   ├─numbers := <opaque>\n"
-            "[  4,   5) │   └─} := tok[4]\n"
-            "[  5, end) └─} := tok[5]\n");
-}
-
 TEST_F(GLRTest, NoExplicitAccept) {
   build(R"bnf(
     _ := test


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] 743971f - Revert "[pseudo] Add error-recovery framework & brace-based recovery"

Reply via email to