hokein updated this revision to Diff 441680.
hokein marked 5 inline comments as done.
hokein added a comment.

fix format.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127448/new/

https://reviews.llvm.org/D127448

Files:
  clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cli/CLI.cpp
  clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -81,6 +81,13 @@
     ADD_FAILURE() << "No such symbol found: " << Name;
     return 0;
   }
+  ExtensionID extensionID(llvm::StringRef AttrValueName) const {
+    for (unsigned ID = 0; ID < TestLang.G.table().AttributeValues.size(); ++ID)
+      if (TestLang.G.table().AttributeValues[ID] == AttrValueName)
+        return static_cast<ExtensionID>(ID);
+    ADD_FAILURE() << "No such attribute value found: " << AttrValueName;
+    return 0;
+  }
 
   RuleID ruleFor(llvm::StringRef NonterminalName) const {
     auto RuleRange =
@@ -133,7 +140,7 @@
   ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
   std::vector<const GSS::Node *> NewHeads;
   glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
-           {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
+           {emptyTokenStream(), Arena, GSStack}, TestLang, NewHeads);
 
   EXPECT_THAT(NewHeads,
               UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@@ -171,7 +178,7 @@
 
   std::vector<const GSS::Node *> Heads = {GSSNode1};
   glrReduce(Heads, tokenSymbol(tok::eof),
-            {TestLang.G, TestLang.Table, Arena, GSStack});
+            {emptyTokenStream(), Arena, GSStack}, TestLang);
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode1,
                          AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -212,7 +219,8 @@
           {/*State=*/4, ruleFor("ptr-operator")},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack},
+            TestLang);
 
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode4,
@@ -267,7 +275,8 @@
           {/*State=*/4, /* type-name := enum-name */ 1},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack},
+            TestLang);
 
   // Verify that the stack heads are joint at state 5 after reduces.
   EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -325,7 +334,7 @@
                              });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
   glrReduce(Heads, tokenSymbol(tok::eof),
-            {TestLang.G, TestLang.Table, Arena, GSStack});
+            {emptyTokenStream(), Arena, GSStack}, TestLang);
 
   EXPECT_THAT(
       Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -363,14 +372,16 @@
 
   // When the lookahead is +, reduce is performed.
   std::vector<const GSS::Node *> Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::plus), {emptyTokenStream(), Arena, GSStack},
+            TestLang);
   EXPECT_THAT(Heads,
               ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
                                           parents(Root))));
 
   // When the lookahead is -, reduce is not performed.
   Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::minus),
+            {emptyTokenStream(), Arena, GSStack}, TestLang);
   EXPECT_THAT(Heads, ElementsAre(GSSNode1));
 }
 
@@ -396,7 +407,7 @@
   const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+      glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
   // Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
   // in the forest, see the `#1` and `=#1` in the dump string.
   EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
@@ -433,7 +444,7 @@
   TestLang.Table = LRTable::buildSLR(TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+      glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
   EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
             "[  0, end) test := <ambiguous>\n"
             "[  0, end) ├─test := IDENTIFIER\n"
@@ -458,7 +469,7 @@
   TestLang.Table = LRTable::buildSLR(TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+      glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
   EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
             "[  0, end) test := IDENTIFIER test\n"
             "[  0,   1) ├─IDENTIFIER := tok[0]\n"
@@ -466,6 +477,39 @@
             "[  1, end)   └─IDENTIFIER := tok[1]\n");
 }
 
+TEST_F(GLRTest, GuardExtension) {
+  build(R"bnf(
+    _ := start
+
+    start := IDENTIFIER [guard=TestOnly]
+  )bnf");
+  TestLang.Guards.insert(std::make_pair(
+      extensionID("TestOnly"),
+      [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
+        llvm::errs() << "callback!\n";
+        assert(RHS.size() == 1 &&
+               RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+        return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
+      }));
+  clang::LangOptions LOptions;
+  TestLang.Table = LRTable::buildSLR(TestLang.G);
+
+  std::string Input = "test";
+  const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions);
+  EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang)
+                .dumpRecursive(TestLang.G),
+            "[  0, end) start := IDENTIFIER [guard=TestOnly]\n"
+            "[  0, end) └─IDENTIFIER := tok[0]\n");
+
+  Input = "notest";
+  const TokenStream &Failed = cook(lex(Input, LOptions), LOptions);
+  llvm::errs() << glrParse({Failed, Arena, GSStack}, id("start"), TestLang)
+                      .dumpRecursive(TestLang.G);
+  EXPECT_EQ(glrParse({Failed, Arena, GSStack}, id("start"), TestLang)
+                .dumpRecursive(TestLang.G),
+            "[  0, end) start := <opaque>\n");
+}
+
 TEST(GSSTest, GC) {
   //      ┌-A-┬-AB
   //      ├-B-┘
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -145,9 +145,8 @@
       return 2;
     }
     auto &Root =
-        glrParse(*ParseableStream,
-                 clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
-                 *StartSymID);
+        glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
+                 *StartSymID, Lang);
     if (PrintForest)
       llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
 
@@ -162,9 +161,8 @@
         return 2;
       }
       auto &Root =
-          glrParse(*ParseableStream,
-                   clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
-                   *StartSymID);
+          glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
+                   *StartSymID, Lang);
       if (PrintForest)
         llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
 
Index: clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
@@ -0,0 +1,9 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+// Verify that the contextual-{final,override} rules are guarded conditionally,
+// No ambiguous parsing for the virt-specifier.
+class Foo {
+    void foo1() override;
+// CHECK: virt-specifier-seq~IDENTIFIER := tok[7]
+    void foo2() final;
+// CHECK: virt-specifier-seq~IDENTIFIER := tok[13]
+};
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -744,8 +744,8 @@
 
 #! Contextual keywords -- clang lexer always lexes them as identifier tokens.
 #! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER
-contextual-final := IDENTIFIER
+contextual-override := IDENTIFIER [guard=Override]
+contextual-final := IDENTIFIER [guard=Final]
 contextual-zero := NUMERIC_CONSTANT
 module-keyword := IDENTIFIER
 import-keyword := IDENTIFIER
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Forest.h"
 #include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
@@ -19,6 +20,26 @@
 static const char *CXXBNF =
 #include "CXXBNF.inc"
     ;
+
+bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
+                   const TokenStream &Tokens) {
+  assert(RHS.size() == 1 &&
+         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+}
+bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
+                const TokenStream &Tokens) {
+  assert(RHS.size() == 1 &&
+         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
+}
+
+llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+  return llvm::DenseMap<ExtensionID, RuleGuard>(
+      {{(ExtensionID)Extension::Override, guardOverride},
+       {(ExtensionID)Extension::Final, guardFinal}});
+}
+
 } // namespace
 
 const Language &getLanguage() {
@@ -27,10 +48,8 @@
     auto G = Grammar::parseBNF(CXXBNF, Diags);
     assert(Diags.empty());
     LRTable Table = LRTable::buildSLR(G);
-    const Language *PL = new Language{
-        std::move(G),
-        std::move(Table),
-    };
+    const Language *PL =
+        new Language{std::move(G), std::move(Table), buildGuards()};
     return *PL;
   }();
   return CXXLanguage;
Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -9,5 +9,6 @@
   cxx_gen
 
   LINK_LIBS
+  clangPseudo
   clangPseudoGrammar
   )
Index: clang-tools-extra/pseudo/lib/cli/CLI.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cli/CLI.cpp
+++ clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -8,6 +8,7 @@
 
 #include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/grammar/Grammar.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -39,7 +40,8 @@
     for (const auto &Diag : Diags)
       llvm::errs() << Diag << "\n";
     auto Table = LRTable::buildSLR(G);
-    return new Language{std::move(G), std::move(Table)};
+    return new Language{std::move(G), std::move(Table),
+                        llvm::DenseMap<ExtensionID, RuleGuard>()};
   }();
   return *Lang;
 }
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/GLR.h"
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/TokenKinds.h"
@@ -51,16 +52,16 @@
 //       └---3---┘
 void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
               const ForestNode &NewTok, const ParseParams &Params,
-              std::vector<const GSS::Node *> &NewHeads) {
+              const Language &Lang, std::vector<const GSS::Node *> &NewHeads) {
   assert(NewTok.kind() == ForestNode::Terminal);
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("  Shift {0} ({1} active heads):\n",
-                                           Params.G.symbolName(NewTok.symbol()),
+                                           Lang.G.symbolName(NewTok.symbol()),
                                            OldHeads.size()));
 
   // We group pending shifts by their target state so we can merge them.
   llvm::SmallVector<std::pair<StateID, const GSS::Node *>, 8> Shifts;
   for (const auto *H : OldHeads)
-    if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol()))
+    if (auto S = Lang.Table.getShiftState(H->State, NewTok.symbol()))
       Shifts.push_back({*S, H});
   llvm::stable_sort(Shifts, llvm::less_first{});
 
@@ -144,7 +145,7 @@
 // storage across calls).
 class GLRReduce {
   const ParseParams &Params;
-
+  const Language& Lang;
   // There are two interacting complications:
   // 1.  Performing one reduce can unlock new reduces on the newly-created head.
   // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).
@@ -230,7 +231,8 @@
 
   Sequence TempSequence;
 public:
-  GLRReduce(const ParseParams &Params) : Params(Params) {}
+  GLRReduce(const ParseParams &Params, const Language &Lang)
+      : Params(Params), Lang(Lang) {}
 
   void operator()(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead) {
     assert(isToken(Lookahead));
@@ -249,10 +251,20 @@
   }
 
 private:
+  bool canReduce(ExtensionID GuardID, SequenceRef RHS) const {
+    if (!GuardID)
+      return true;
+    auto It = Lang.Guards.find(GuardID);
+    if (It == Lang.Guards.end()) {
+      llvm::dbgs() << " missing {0} guard implementation for rule {0}\n";
+      return true;
+    }
+    return It->second(RHS.S, Params.Code);
+  }
   // pop walks up the parent chain(s) for a reduction from Head by to Rule.
   // Once we reach the end, record the bases and sequences.
   void pop(const GSS::Node *Head, RuleID RID, const Rule &Rule) {
-    LLVM_DEBUG(llvm::dbgs() << "  Pop " << Params.G.dumpRule(RID) << "\n");
+    LLVM_DEBUG(llvm::dbgs() << "  Pop " << Lang.G.dumpRule(RID) << "\n");
     Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID};
     TempSequence.resize_for_overwrite(Rule.Size);
     auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
@@ -263,7 +275,8 @@
           for (const auto *B : N->parents())
             llvm::dbgs() << "    --> base at S" << B->State << "\n";
         });
-
+        if (!canReduce(Rule.Guard, TempSequence))
+          return;
         // Copy the chain to stable storage so it can be enqueued.
         if (SequenceStorageCount == SequenceStorage.size())
           SequenceStorage.emplace_back();
@@ -286,9 +299,9 @@
       if (popAndPushTrivial())
         continue;
       for (RuleID RID :
-           Params.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
-        const auto &Rule = Params.G.lookupRule(RID);
-        if (Params.Table.canFollow(Rule.Target, Lookahead))
+           Lang.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
+        const auto &Rule = Lang.G.lookupRule(RID);
+        if (Lang.Table.canFollow(Rule.Target, Lookahead))
           pop((*Heads)[NextPopHead], RID, Rule);
       }
     }
@@ -306,7 +319,7 @@
     assert(!Sequences.empty());
     Family F = Sequences.top().first;
 
-    LLVM_DEBUG(llvm::dbgs() << "  Push " << Params.G.symbolName(F.Symbol)
+    LLVM_DEBUG(llvm::dbgs() << "  Push " << Lang.G.symbolName(F.Symbol)
                             << " from token " << F.Start << "\n");
 
     // Grab the sequences and bases for this family.
@@ -320,7 +333,7 @@
       FamilySequences.emplace_back(Sequences.top().first.Rule, *Push.Seq);
       for (const GSS::Node *Base : Push.LastPop->parents())
         FamilyBases.emplace_back(
-            Params.Table.getGoToState(Base->State, F.Symbol), Base);
+            Lang.Table.getGoToState(Base->State, F.Symbol), Base);
 
       Sequences.pop();
     } while (!Sequences.empty() && Sequences.top().first == F);
@@ -335,7 +348,7 @@
         SequenceNodes.size() == 1
             ? SequenceNodes.front()
             : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes);
-    LLVM_DEBUG(llvm::dbgs() << "    --> " << Parsed->dump(Params.G) << "\n");
+    LLVM_DEBUG(llvm::dbgs() << "    --> " << Parsed->dump(Lang.G) << "\n");
 
     // Bases for this family, deduplicate them, and group by the goTo State.
     sortAndUnique(FamilyBases);
@@ -373,15 +386,15 @@
       return false;
     const GSS::Node *Head = Heads->back();
     llvm::Optional<RuleID> RID;
-    for (RuleID R : Params.Table.getReduceRules(Head->State)) {
+    for (RuleID R : Lang.Table.getReduceRules(Head->State)) {
       if (RID.hasValue())
         return false;
       RID = R;
     }
     if (!RID)
       return true; // no reductions available, but we've processed the head!
-    const auto &Rule = Params.G.lookupRule(*RID);
-    if (!Params.Table.canFollow(Rule.Target, Lookahead))
+    const auto &Rule = Lang.G.lookupRule(*RID);
+    if (!Lang.Table.canFollow(Rule.Target, Lookahead))
       return true; // reduction is not available
     const GSS::Node *Base = Head;
     TempSequence.resize_for_overwrite(Rule.Size);
@@ -391,9 +404,11 @@
       TempSequence[Rule.Size - 1 - I] = Base->Payload;
       Base = Base->parents().front();
     }
+    if (!canReduce(Rule.Guard, TempSequence))
+      return true; // reduction is not available
     const ForestNode *Parsed =
         &Params.Forest.createSequence(Rule.Target, *RID, TempSequence);
-    StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target);
+    StateID NextState = Lang.Table.getGoToState(Base->State, Rule.Target);
     Heads->push_back(Params.GSStack.addNode(NextState, Parsed, {Base}));
     return true;
   }
@@ -401,16 +416,14 @@
 
 } // namespace
 
-const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
-                           SymbolID StartSymbol) {
-  GLRReduce Reduce(Params);
+const ForestNode &glrParse( const ParseParams &Params, SymbolID StartSymbol,
+                           const Language& Lang) {
+  GLRReduce Reduce(Params, Lang);
   assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
-  llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
-  auto &G = Params.G;
-  (void)G;
+  llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Params.Code);
   auto &GSS = Params.GSStack;
 
-  StateID StartState = Params.Table.getStartState(StartSymbol);
+  StateID StartState = Lang.Table.getStartState(StartSymbol);
   // Heads correspond to the parse of tokens [0, I), NextHeads to [0, I+1).
   std::vector<const GSS::Node *> Heads = {GSS.addNode(/*State=*/StartState,
                                                       /*ForestNode=*/nullptr,
@@ -430,9 +443,9 @@
   for (unsigned I = 0; I < Terminals.size(); ++I) {
     LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
                    "Next token {0} (id={1})\n",
-                   G.symbolName(Terminals[I].symbol()), Terminals[I].symbol()));
+                  Lang.G.symbolName(Terminals[I].symbol()), Terminals[I].symbol()));
     // Consume the token.
-    glrShift(Heads, Terminals[I], Params, NextHeads);
+    glrShift(Heads, Terminals[I], Params, Lang, NextHeads);
     // Form nonterminals containing the token we just consumed.
     SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof)
                                                    : Terminals[I + 1].symbol();
@@ -444,7 +457,7 @@
   }
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
 
-  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  StateID AcceptState = Lang.Table.getGoToState(StartState, StartSymbol);
   const ForestNode *Result = nullptr;
   for (const auto *Head : Heads) {
     if (Head->State == AcceptState) {
@@ -464,9 +477,9 @@
 }
 
 void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
-               const ParseParams &Params) {
+               const ParseParams &Params, const Language &Lang) {
   // Create a new GLRReduce each time for tests, performance doesn't matter.
-  GLRReduce{Params}(Heads, Lookahead);
+  GLRReduce{Params, Lang}(Heads, Lookahead);
 }
 
 const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol,
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@
 #undef NONTERMINAL
 };
 
+enum class Extension : ExtensionID {
+#define EXTENSION(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef EXTENSION
+};
+
 // Returns the Language for the cxx.bnf grammar.
 const Language &getLanguage();
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -14,12 +14,28 @@
 
 namespace clang {
 namespace pseudo {
+class ForestNode;
+class TokenStream;
+class LRTable;
+
+// A guard restricts when a grammar rule can be used.
+//
+// The GLR parser will use the guard to determine whether a rule reduction will
+// be conducted. For example, e.g. a guard may allow the rule
+// `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`.
+//
+// Return true if the guard is satisfied.
+using RuleGuard = llvm::function_ref<bool(
+    llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
 
 // Specify a language that can be parsed by the pseduoparser.
 struct Language {
   Grammar G;
   LRTable Table;
 
+  // Binding "guard" extension id to a piece of C++ code.
+  llvm::DenseMap<ExtensionID, RuleGuard> Guards;
+
   // FIXME: add clang::LangOptions.
   // FIXME: add default start symbols.
 };
Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -30,6 +30,7 @@
 #define CLANG_PSEUDO_GLR_H
 
 #include "clang-pseudo/Forest.h"
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "llvm/Support/Allocator.h"
@@ -112,38 +113,35 @@
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
 
 // Parameters for the GLR parsing.
-// FIXME: refine it with the ParseLang struct.
 struct ParseParams {
-  // The grammar of the language we're going to parse.
-  const Grammar &G;
-  // The LR table which GLR uses to parse the input, should correspond to the
-  // Grammar G.
-  const LRTable &Table;
+  // The token stream being to parse.
+  const TokenStream &Code;
 
   // Arena for data structure used by the GLR algorithm.
   ForestArena &Forest;  // Storage for the output forest.
   GSS &GSStack;         // Storage for parsing stacks.
 };
+
 // Parses the given token stream as the start symbol with the GLR algorithm,
 // and returns a forest node of the start symbol.
 //
 // A rule `_ := StartSymbol` must exit for the chosen start symbol.
 //
 // If the parsing fails, we model it as an opaque node in the forest.
-const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,
-                           SymbolID StartSymbol);
+const ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol,
+                           const Language &Lang);
 
 // Shift a token onto all OldHeads, placing the results into NewHeads.
 //
 // Exposed for testing only.
 void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
               const ForestNode &NextTok, const ParseParams &Params,
-              std::vector<const GSS::Node *> &NewHeads);
+              const Language &Lang, std::vector<const GSS::Node *> &NewHeads);
 // Applies available reductions on Heads, appending resulting heads to the list.
 //
 // Exposed for testing only.
 void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
-               const ParseParams &Params);
+               const ParseParams &Params, const Language &Lang);
 
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -79,6 +79,14 @@
 
   switch (Emit) {
   case EmitSymbolList:
+    Out.os() << R"cpp(
+#ifndef NONTERMINAL
+#define NONTERMINAL(X, Y)
+#endif
+#ifndef EXTENSION
+#define EXTENSION(X, Y)
+#endif
+    )cpp";
     for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
          ++ID) {
       std::string Name = G.symbolName(ID).str();
@@ -86,6 +94,16 @@
       std::replace(Name.begin(), Name.end(), '-', '_');
       Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
     }
+    for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
+         EID < G.table().AttributeValues.size(); ++EID) {
+      llvm::StringRef Name = G.table().AttributeValues[EID];
+      assert(!Name.empty());
+      Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
+    }
+    Out.os() << R"cpp(
+#undef NONTERMINAL
+#undef EXTENSION
+    )cpp";
     break;
   case EmitGrammarContent:
     for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
Index: clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
===================================================================
--- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -43,9 +43,8 @@
     clang::pseudo::GSS GSS;
     const Language &Lang = getLanguageFromFlags();
     auto &Root =
-        glrParse(ParseableStream,
-                 clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
-                 *Lang.G.findNonterminal("translation-unit"));
+        glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS},
+                 *Lang.G.findNonterminal("translation-unit"), Lang);
     if (Print)
       llvm::outs() << Root.dumpRecursive(Lang.G);
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to