This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7d8e2742d958: [pseudo] Define recovery strategy as grammar 
extension. (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D129158?vs=442383&id=442541#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129158/new/

https://reviews.llvm.org/D129158

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
  clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
  clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cli/CLI.cpp
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
  clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
  clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
  clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
  clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -48,6 +48,17 @@
                            testing::UnorderedElementsAreArray(Parents));
 }
 
+Token::Index recoverBraces(Token::Index Begin, const TokenStream &Code) {
+  EXPECT_GT(Begin, 0u);
+  const Token &Left = Code.tokens()[Begin - 1];
+  EXPECT_EQ(Left.Kind, tok::l_brace);
+  if (const auto* Right = Left.pair()) {
+    EXPECT_EQ(Right->Kind, tok::r_brace);
+    return Code.index(*Right);
+  }
+  return Token::Invalid;
+}
+
 class GLRTest : public ::testing::Test {
 public:
   void build(llvm::StringRef GrammarBNF) {
@@ -375,11 +386,12 @@
   //    0--1({)--2(?)
   //  After recovering a `word` at state 1:
   //    0--3(word)  // 3 is goto(1, word)
-  buildGrammar({"word"}, {});
+  buildGrammar({"word", "top"}, {"top := { word [recover=Braces] }"});
   LRTable::Builder B(TestLang.G);
   B.Transition[{StateID{1}, id("word")}] = StateID{3};
-  B.Recoveries.push_back({StateID{1}, {RecoveryStrategy::Braces, id("word")}});
+  B.Recoveries.push_back({StateID{1}, {extensionID("Braces"), id("word")}});
   TestLang.Table = std::move(B).build();
+  TestLang.RecoveryStrategies.try_emplace(extensionID("Braces"), recoverBraces);
 
   auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
   auto *Question1 = &Arena.createTerminal(tok::question, 1);
@@ -418,11 +430,12 @@
   //    0--1({)--1({)--1({)
   //  After recovering a `block` at inside the second braces:
   //    0--1({)--2(body)  // 2 is goto(1, body)
-  buildGrammar({"body"}, {});
+  buildGrammar({"body", "top"}, {"top := { body [recover=Braces] }"});
   LRTable::Builder B(TestLang.G);
   B.Transition[{StateID{1}, id("body")}] = StateID{2};
-  B.Recoveries.push_back({StateID{1}, {RecoveryStrategy::Braces, id("body")}});
+  B.Recoveries.push_back({StateID{1}, {extensionID("Braces"), id("body")}});
   TestLang.Table = std::move(B).build();
+  TestLang.RecoveryStrategies.try_emplace(extensionID("Braces"), recoverBraces);
 
   clang::LangOptions LOptions;
   // Innermost brace is unmatched, to test fallback to next brace.
@@ -455,13 +468,17 @@
   //  After recovering either `word` or `number` inside the braces:
   //    0--1({)--2(word)   // 2 is goto(1, word)
   //          └--3(number) // 3 is goto(1, number)
-  buildGrammar({"number", "word"}, {});
+  buildGrammar({"number", "word", "top"},
+               {
+                   "top := { number [recover=Braces] }",
+                   "top := { word [recover=Braces] }",
+               });
   LRTable::Builder B(TestLang.G);
   B.Transition[{StateID{1}, id("number")}] = StateID{2};
   B.Transition[{StateID{1}, id("word")}] = StateID{3};
-  B.Recoveries.push_back(
-      {StateID{1}, {RecoveryStrategy::Braces, id("number")}});
-  B.Recoveries.push_back({StateID{1}, {RecoveryStrategy::Braces, id("word")}});
+  B.Recoveries.push_back({StateID{1}, {extensionID("Braces"), id("number")}});
+  B.Recoveries.push_back({StateID{1}, {extensionID("Braces"), id("word")}});
+  TestLang.RecoveryStrategies.try_emplace(extensionID("Braces"), recoverBraces);
   TestLang.Table = std::move(B).build();
   auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
   const auto *Root = GSStack.addNode(0, nullptr, {});
@@ -560,11 +577,12 @@
   build(R"bnf(
     _ := block
 
-    block := { block }
-    block := { numbers }
+    block := { block [recover=Braces] }
+    block := { numbers [recover=Braces] }
     numbers := NUMERIC_CONSTANT NUMERIC_CONSTANT
   )bnf");
   TestLang.Table = LRTable::buildSLR(TestLang.G);
+  TestLang.RecoveryStrategies.try_emplace(extensionID("Braces"), recoverBraces);
   clang::LangOptions LOptions;
   TokenStream Tokens = cook(lex("{ { 42 ? } }", LOptions), LOptions);
   pairBrackets(Tokens);
@@ -572,14 +590,14 @@
   const ForestNode &Parsed =
       glrParse({Tokens, Arena, GSStack}, id("block"), TestLang);
   EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
-            "[  0, end) block := { block [recover=1] }\n"
+            "[  0, end) block := { block [recover=Braces] }\n"
             "[  0,   1) ├─{ := tok[0]\n"
             "[  1,   5) ├─block := <ambiguous>\n"
-            "[  1,   5) │ ├─block := { block [recover=1] }\n"
+            "[  1,   5) │ ├─block := { block [recover=Braces] }\n"
             "[  1,   2) │ │ ├─{ := tok[1]\n"
             "[  2,   4) │ │ ├─block := <opaque>\n"
             "[  4,   5) │ │ └─} := tok[4]\n"
-            "[  1,   5) │ └─block := { numbers [recover=1] }\n"
+            "[  1,   5) │ └─block := { numbers [recover=Braces] }\n"
             "[  1,   2) │   ├─{ := tok[1]\n"
             "[  2,   4) │   ├─numbers := <opaque>\n"
             "[  4,   5) │   └─} := tok[4]\n"
Index: clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
+++ clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
@@ -1,4 +1,4 @@
-// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
 auto x = { complete garbage };
 // CHECK:      translation-unit~simple-declaration
 // CHECK-NEXT: ├─decl-specifier-seq~AUTO := tok[0]
Index: clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
+++ clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
@@ -2,7 +2,7 @@
 class Foo {
 public:
 };
-// CHECK:      decl-specifier-seq~class-specifier := class-head { member-specification [recover=1] }
+// CHECK:      decl-specifier-seq~class-specifier := class-head { member-specification [recover=Brackets] }
 // CHECK-NEXT: ├─class-head := class-key class-head-name
 // CHECK-NEXT: │ ├─class-key~CLASS := tok[0]
 // CHECK-NEXT: │ └─class-head-name~IDENTIFIER := tok[1]
Index: clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
+++ clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
@@ -120,9 +120,9 @@
   return Results;
 }
 
-std::vector<std::pair<RecoveryStrategy, SymbolID>>
+std::vector<std::pair<ExtensionID, SymbolID>>
 availableRecovery(const State &S, const Grammar &G) {
-  std::vector<std::pair<RecoveryStrategy, SymbolID>> Result;
+  std::vector<std::pair<ExtensionID, SymbolID>> Result;
   for (const Item &I : S.Items) {
     const auto &Rule = G.lookupRule(I.rule());
     if (I.dot() != Rule.RecoveryIndex)
@@ -196,8 +196,7 @@
       Edges.push_back({Src, Dst, Label});
     }
 
-    void insertRecovery(StateID Src, RecoveryStrategy Strategy,
-                        SymbolID Result) {
+    void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) {
       Recoveries.push_back({Src, Strategy, Result});
     }
 
Index: clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -106,17 +106,6 @@
 
     assert(T->Rules.size() < (1 << RuleBits) &&
            "Too many rules to fit in RuleID bits!");
-    // Wherever RHS contains { foo }, mark foo for brace-recovery.
-    // FIXME: this should be grammar annotations instead.
-    for (auto &Rule : T->Rules) {
-      for (unsigned I = 2; I < Rule.Size; ++I)
-        if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) &&
-            Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace) &&
-            !isToken(Rule.Sequence[I - 1])) {
-          Rule.Recovery = RecoveryStrategy::Braces;
-          Rule.RecoveryIndex = I - 1;
-        }
-    }
     const auto &SymbolOrder = getTopologicalOrder(T.get());
     llvm::stable_sort(
         T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) {
@@ -266,13 +255,18 @@
              "Didn't find the attribute in AttrValues!");
       return It - T.AttributeValues.begin();
     };
-    for (const auto &KV : Spec.Sequence.back().Attributes) {
-      if (KV.first == "guard") {
-        R.Guard = LookupExtensionID(KV.second);
-        continue;
+    for (unsigned I = 0; I < Spec.Sequence.size(); ++I) {
+      for (const auto &KV : Spec.Sequence[I].Attributes) {
+        if (KV.first == "guard") {
+          R.Guard = LookupExtensionID(KV.second);
+        } else if (KV.first == "recover") {
+          R.Recovery = LookupExtensionID(KV.second);
+          R.RecoveryIndex = I;
+        } else {
+          Diagnostics.push_back(
+              llvm::formatv("Unknown attribute '{0}'", KV.first).str());
+        }
       }
-      Diagnostics.push_back(
-          llvm::formatv("Unknown attribute '{0}'", KV.first).str());
     }
   }
 
Index: clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -62,7 +62,7 @@
   for (unsigned I = 0; I < R.Size; ++I) {
     OS << " " << symbolName(R.Sequence[I]);
     if (R.RecoveryIndex == I)
-      OS << " [recover=" << static_cast<unsigned>(R.Recovery) << "]";
+      OS << " [recover=" << T->AttributeValues[R.Recovery] << "]";
   }
   if (R.Guard)
     OS << " [guard=" << T->AttributeValues[R.Guard] << "]";
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -287,7 +287,7 @@
 labeled-statement := CASE constant-expression : statement
 labeled-statement := DEFAULT : statement
 expression-statement := expression_opt ;
-compound-statement := { statement-seq_opt }
+compound-statement := { statement-seq_opt [recover=Brackets] }
 statement-seq := statement
 statement-seq := statement-seq statement
 selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement
@@ -458,7 +458,9 @@
 initializer-clause := braced-init-list
 #! Allow mixed designated/non-designated init-list.
 # This is standard C, and accepted by clang and others as an extension.
-braced-init-list := { initializer-list ,_opt }
+# FIXME: Decouple recovery from is-there-a-trailing-comma!
+braced-init-list := { initializer-list [recover=Brackets] }
+braced-init-list := { initializer-list , }
 braced-init-list := { }
 initializer-list := initializer-list-item
 initializer-list := initializer-list , initializer-list-item
@@ -533,7 +535,7 @@
 private-module-fragment := module-keyword : PRIVATE ; declaration-seq_opt
 
 # gram.class
-class-specifier := class-head { member-specification_opt }
+class-specifier := class-head { member-specification_opt [recover=Brackets] }
 class-head := class-key class-head-name class-virt-specifier_opt base-clause_opt
 class-head := class-key base-clause_opt
 class-head-name := nested-name-specifier_opt class-name
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -35,9 +35,28 @@
 }
 
 llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
-  return llvm::DenseMap<ExtensionID, RuleGuard>(
-      {{(ExtensionID)Extension::Override, guardOverride},
-       {(ExtensionID)Extension::Final, guardFinal}});
+  return {
+      {(ExtensionID)Extension::Override, guardOverride},
+      {(ExtensionID)Extension::Final, guardFinal},
+  };
+}
+
+Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
+  assert(Begin > 0);
+  const Token &Left = Tokens.tokens()[Begin - 1];
+  assert(Left.Kind == tok::l_brace || Left.Kind == tok::l_paren ||
+         Left.Kind == tok::l_square);
+  if (const Token *Right = Left.pair()) {
+    assert(Tokens.index(*Right) > Begin);
+    return Tokens.index(*Right);
+  }
+  return Token::Invalid;
+}
+
+llvm::DenseMap<ExtensionID, RecoveryStrategy> buildRecoveryStrategies() {
+  return {
+      {(ExtensionID)Extension::Brackets, recoverBrackets},
+  };
 }
 
 } // namespace
@@ -48,8 +67,12 @@
     auto G = Grammar::parseBNF(CXXBNF, Diags);
     assert(Diags.empty());
     LRTable Table = LRTable::buildSLR(G);
-    const Language *PL =
-        new Language{std::move(G), std::move(Table), buildGuards()};
+    const Language *PL = new Language{
+        std::move(G),
+        std::move(Table),
+        buildGuards(),
+        buildRecoveryStrategies(),
+    };
     return *PL;
   }();
   return CXXLanguage;
Index: clang-tools-extra/pseudo/lib/cli/CLI.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cli/CLI.cpp
+++ clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -40,8 +40,12 @@
     for (const auto &Diag : Diags)
       llvm::errs() << Diag << "\n";
     auto Table = LRTable::buildSLR(G);
-    return new Language{std::move(G), std::move(Table),
-                        llvm::DenseMap<ExtensionID, RuleGuard>()};
+    return new Language{
+        std::move(G),
+        std::move(Table),
+        llvm::DenseMap<ExtensionID, RuleGuard>(),
+        llvm::DenseMap<ExtensionID, RecoveryStrategy>(),
+    };
   }();
   return *Lang;
 }
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -27,15 +27,13 @@
 namespace pseudo {
 namespace {
 
-Token::Index findRecoveryEndpoint(RecoveryStrategy Strategy,
-                                  const GSS::Node *RecoveryNode,
-                                  const TokenStream &Tokens) {
-  assert(Strategy == RecoveryStrategy::Braces);
-  const ForestNode *LBrace = RecoveryNode->Payload;
-  assert(LBrace->kind() == ForestNode::Terminal &&
-         LBrace->symbol() == tokenSymbol(tok::l_brace));
-  if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair())
-    return Tokens.index(*RBrace);
+Token::Index findRecoveryEndpoint(ExtensionID Strategy, Token::Index Begin,
+                                  const TokenStream &Tokens,
+                                  const Language &Lang) {
+  assert(Strategy != 0);
+  assert(Begin > 0);
+  if (auto S = Lang.RecoveryStrategies.lookup(Strategy))
+    return S(Begin, Tokens);
   return Token::Invalid;
 }
 
@@ -55,7 +53,7 @@
     // The nonterminal which will be created in order to recover.
     SymbolID Symbol;
     // The heuristic used to choose the bounds of the nonterminal to recover.
-    RecoveryStrategy Strategy;
+    ExtensionID Strategy;
 
     // The GSS head where we are expecting the recovered nonterminal.
     const GSS::Node *RecoveryNode;
@@ -142,8 +140,8 @@
     if (RecoveryRange && Option.Position < RecoveryRange->Begin)
       break;
 
-    auto End =
-        findRecoveryEndpoint(Option.Strategy, Option.RecoveryNode, Params.Code);
+    auto End = findRecoveryEndpoint(Option.Strategy, Option.Position,
+                                    Params.Code, Lang);
     // Recovery may not take the parse backwards.
     if (End == Token::Invalid || End < TokenIndex)
       continue;
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@@ -68,7 +68,7 @@
   static constexpr unsigned StateBits = 13;
 
   struct Recovery {
-    RecoveryStrategy Strategy;
+    ExtensionID Strategy;
     SymbolID Result;
   };
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
@@ -144,7 +144,7 @@
   // has a Recovery { Src = S, Strategy=braces, Result=stmt-seq }.
   struct Recovery {
     StateID Src; // The state we are in when encountering the error.
-    RecoveryStrategy Strategy; // Heuristic choosing the tokens to match.
+    ExtensionID Strategy;      // Heuristic choosing the tokens to match.
     SymbolID Result;           // The symbol that is produced.
   };
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -85,9 +85,6 @@
 inline constexpr SymbolID tokenSymbol(tok::TokenKind TK) {
   return TokenFlag | static_cast<SymbolID>(TK);
 }
-// Error recovery strategies.
-// FIXME: these should be provided as extensions instead.
-enum class RecoveryStrategy : uint8_t { None, Braces };
 
 // An extension is a piece of native code specific to a grammar that modifies
 // the behavior of annotated rules. One ExtensionID is assigned for each unique
@@ -133,7 +130,7 @@
   // everything between braces.
   // For now, only a single strategy at a single point is possible.
   uint8_t RecoveryIndex = -1;
-  RecoveryStrategy Recovery = RecoveryStrategy::None;
+  ExtensionID Recovery = 0;
 
   llvm::ArrayRef<SymbolID> seq() const {
     return llvm::ArrayRef<SymbolID>(Sequence, Size);
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -9,6 +9,7 @@
 #ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
 #define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
 
+#include "clang-pseudo/Token.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 
@@ -28,13 +29,25 @@
 using RuleGuard = llvm::function_ref<bool(
     llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
 
+// A recovery strategy determines a region of code to skip when parsing fails.
+//
+// For example, given `class-def := CLASS IDENT { body [recover=Brackets] }`,
+// if parsing fails while attempting to parse `body`, we may skip up to the
+// matching `}` and assume everything between was a `body`.
+//
+// The provided index is the token where the skipped region begins.
+// Returns the (excluded) end of the range, or Token::Invalid for no recovery.
+using RecoveryStrategy =
+    llvm::function_ref<Token::Index(Token::Index Start, const TokenStream &)>;
+
 // Specify a language that can be parsed by the pseduoparser.
 struct Language {
   Grammar G;
   LRTable Table;
 
-  // Binding "guard" extension id to a piece of C++ code.
+  // Binding extension ids to corresponding implementations.
   llvm::DenseMap<ExtensionID, RuleGuard> Guards;
+  llvm::DenseMap<ExtensionID, RecoveryStrategy> RecoveryStrategies;
 
   // FIXME: add clang::LangOptions.
   // FIXME: add default start symbols.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to