hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a subscriber: mgorny.
Herald added a project: All.
hokein requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.

- define a common data structure ParseLang which is a compiled result of the 
bnf grammar (output of clangPseudoCXX and clangPseudoCLI). It is defined in 
Language.h. The Language.h file is shared with differnt libraries;
- creates a clangPseudoCLI lib which defines a `grammar` commandline flag and 
expose a function to get the ParseLang. It supports --grammar=cxx, 
--grammmar=/path/to/file.bnf. It is used in clang-pseudo, fuzzer, and benchmark 
tools;
- implement two simple guards (contextual-override/final) for cxx.bnf;
- layering: clangPseudoCXX depends on clangPseudo (as the guard function need 
to access the TokenStream);

TODO:

- comments are missing;
- figure out better name for ParseLang;
- add lit&unit tests for the cxx guards;


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D127448

Files:
  clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
  clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
  clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
  clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
  clang-tools-extra/pseudo/lib/CMakeLists.txt
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cli/CLI.cpp
  clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx.bnf
  clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/tool/CMakeLists.txt
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -48,7 +48,15 @@
     std::vector<std::string> Diags;
     G = Grammar::parseBNF(GrammarBNF, Diags);
   }
-
+  // FIXME: move to TokenStream class.
+  TokenStream emptyTokenStream() {
+    TokenStream Empty;
+    Empty.finalize();
+    return Empty;
+  }
+  ParseLang getTestLang() {
+    return {*G, Table, Guards};
+  }
   void buildGrammar(std::vector<std::string> Nonterminals,
                     std::vector<std::string> Rules) {
     Nonterminals.push_back("_");
@@ -91,6 +99,8 @@
 
 protected:
   std::unique_ptr<Grammar> G;
+  LRTable Table;
+  llvm::DenseMap<ExtensionID, Guard> Guards;
   ForestArena Arena;
   GSS GSStack;
   std::vector<const GSS::Node*> NewHeadResults;
@@ -117,7 +127,7 @@
                                    /*Parents=*/{GSSNode0});
 
   buildGrammar({}, {}); // Create a fake empty grammar.
-  LRTable T = LRTable::buildForTests(G->table(), /*Entries=*/{});
+  Table = LRTable::buildForTests(G->table(), /*Entries=*/{});
 
   ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
   std::vector<ParseStep> PendingShift = {
@@ -125,7 +135,7 @@
       {GSSNode3, Action::shift(5)},
       {GSSNode2, Action::shift(4)},
   };
-  glrShift(PendingShift, SemiTerminal, {*G, T, Arena, GSStack},
+  glrShift(PendingShift, SemiTerminal,  {getTestLang(), Arena, GSStack},
            captureNewHeads());
 
   EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(
@@ -146,7 +156,7 @@
   buildGrammar({"class-name", "enum-name"},
                {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
 
-  LRTable Table = LRTable::buildForTests(
+  Table = LRTable::buildForTests(
       G->table(), {{/*State=*/0, id("class-name"), Action::goTo(2)},
                    {/*State=*/0, id("enum-name"), Action::goTo(3)}});
 
@@ -158,7 +168,7 @@
   std::vector<ParseStep> PendingReduce = {
       {GSSNode1, Action::reduce(ruleFor("class-name"))},
       {GSSNode1, Action::reduce(ruleFor("enum-name"))}};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(),
             captureNewHeads());
   EXPECT_THAT(NewHeadResults,
               testing::UnorderedElementsAre(
@@ -189,13 +199,13 @@
       /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
       /*Parents=*/{GSSNode2, GSSNode3});
 
-  LRTable Table = LRTable::buildForTests(
+  Table = LRTable::buildForTests(
       G->table(),
       {{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
        {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}});
   std::vector<ParseStep> PendingReduce = {
       {GSSNode4, Action::reduce(ruleFor("ptr-operator"))}};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(),
             captureNewHeads());
 
   EXPECT_THAT(NewHeadResults,
@@ -238,7 +248,7 @@
       GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode,
                       /*Parents=*/{GSSNode2});
 
-  LRTable Table = LRTable::buildForTests(
+  Table = LRTable::buildForTests(
       G->table(),
       {{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
        {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)}});
@@ -250,7 +260,7 @@
       {
           GSSNode4, Action::reduce(/*RuleID=*/1) // type-name := enum-name
       }};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(),
             captureNewHeads());
 
   // Verify that the stack heads are joint at state 5 after reduces.
@@ -296,7 +306,7 @@
       GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal,
                       /*Parents=*/{GSSNode2});
 
-  LRTable Table = LRTable::buildForTests(
+  Table = LRTable::buildForTests(
       G->table(), {{/*State=*/0, id("pointer"), Action::goTo(5)}});
   // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
   std::vector<ParseStep> PendingReduce = {
@@ -306,7 +316,7 @@
       {
           GSSNode4, Action::reduce(/*RuleID=*/1) // pointer := enum-name *
       }};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce,{getTestLang(), Arena, GSStack}, emptyTokenStream(),
             captureNewHeads());
 
   EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(
@@ -340,12 +350,12 @@
     left-paren := {
     expr := IDENTIFIER
   )bnf");
+  Table = LRTable::buildSLR(*G);
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+      glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test"));
   // Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
   // in the forest, see the `#1` and `=#1` in the dump string.
   EXPECT_EQ(Parsed.dumpRecursive(*G),
@@ -380,10 +390,10 @@
   )bnf");
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
+  Table = LRTable::buildSLR(*G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+      glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test"));
   EXPECT_EQ(Parsed.dumpRecursive(*G),
             "[  0, end) test := <ambiguous>\n"
             "[  0, end) ├─test := IDENTIFIER\n"
@@ -405,10 +415,10 @@
   // of the nonterminal `test` when the next token is `eof`, verify that the
   // parser stops at the right state.
   const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(*G);
+  Table = LRTable::buildSLR(*G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+      glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test"));
   EXPECT_EQ(Parsed.dumpRecursive(*G),
             "[  0, end) test := IDENTIFIER test\n"
             "[  0,   1) ├─IDENTIFIER := tok[0]\n"
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -10,6 +10,7 @@
 #include "clang-pseudo/DirectiveTree.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRGraph.h"
 #include "clang-pseudo/grammar/LRTable.h"
@@ -20,14 +21,11 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
 
-using clang::pseudo::Grammar;
 using clang::pseudo::TokenStream;
 using llvm::cl::desc;
 using llvm::cl::init;
 using llvm::cl::opt;
 
-static opt<std::string>
-    Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
 static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
 static opt<bool> PrintGraph("print-graph",
                             desc("Print the LR graph for the grammar"));
@@ -93,42 +91,34 @@
     pairBrackets(*ParseableStream);
   }
 
-  if (Grammar.getNumOccurrences()) {
-    std::string Text = readOrDie(Grammar);
-    std::vector<std::string> Diags;
-    auto G = Grammar::parseBNF(Text, Diags);
-
-    if (!Diags.empty()) {
-      llvm::errs() << llvm::join(Diags, "\n");
-      return 2;
-    }
-    llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
-                                  Grammar);
+  if (true) {
+    const auto &Lang = clang::pseudo::getParseLang();
     if (PrintGrammar)
-      llvm::outs() << G->dump();
+      llvm::outs() << Lang.G.dump();
     if (PrintGraph)
-      llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G);
-    auto LRTable = clang::pseudo::LRTable::buildSLR(*G);
+      llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests(
+          Lang.G);
+
     if (PrintTable)
-      llvm::outs() << LRTable.dumpForTests(*G);
+      llvm::outs() << Lang.Table.dumpForTests(Lang.G);
     if (PrintStatistics)
-      llvm::outs() << LRTable.dumpStatistics();
+      llvm::outs() << Lang.Table.dumpStatistics();
 
     if (ParseableStream) {
       clang::pseudo::ForestArena Arena;
       clang::pseudo::GSS GSS;
       llvm::Optional<clang::pseudo::SymbolID> StartSymID =
-          G->findNonterminal(StartSymbol);
+          Lang.G.findNonterminal(StartSymbol);
       if (!StartSymID) {
         llvm::errs() << llvm::formatv(
-            "The start symbol {0} doesn't exit in the grammar!\n", Grammar);
+            "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
         return 2;
       }
-      auto &Root = glrParse(*ParseableStream,
-                            clang::pseudo::ParseParams{*G, LRTable, Arena, GSS},
-                            *StartSymID);
+      auto &Root =
+          glrParse(*ParseableStream,
+                   clang::pseudo::ParseParams{Lang, Arena, GSS}, *StartSymID);
       if (PrintForest)
-        llvm::outs() << Root.dumpRecursive(*G, /*Abbreviated=*/true);
+        llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
 
       if (PrintStatistics) {
         llvm::outs() << "Forest bytes: " << Arena.bytes()
Index: clang-tools-extra/pseudo/tool/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/tool/CMakeLists.txt
+++ clang-tools-extra/pseudo/tool/CMakeLists.txt
@@ -13,5 +13,6 @@
   PRIVATE
   clangPseudo
   clangPseudoGrammar
+  clangPseudoCLI
   )
 
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,7 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
+#include <utility>
 
 namespace clang {
 namespace pseudo {
@@ -29,6 +32,30 @@
   return *Table;
 }
 
+namespace {
+bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
+                   const TokenStream &Tokens) {
+  assert(RHS.size() == 1 &&
+         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+}
+bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
+                const TokenStream &Tokens) {
+  assert(RHS.size() == 1 &&
+         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
+}
+} // namespace
+
+const ParseLang &getLanguage() {
+  static llvm::DenseMap<ExtensionID, Guard> *Guards =
+      new llvm::DenseMap<ExtensionID, Guard>(
+          {{(ExtensionID)Extension::Override, guardOverride},
+           {(ExtensionID)Extension::Final, guardFinal}});
+  static ParseLang *L = new ParseLang{getGrammar(), getLRTable(), *Guards};
+  return *L;
+}
+
 } // namespace cxx
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/lib/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx.bnf
@@ -739,8 +739,8 @@
 
 #! Contextual keywords -- clang lexer always lexes them as identifier tokens.
 #! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER
-contextual-final := IDENTIFIER
+contextual-override := IDENTIFIER [guard=Override]
+contextual-final := IDENTIFIER [guard=Final]
 contextual-zero := NUMERIC_CONSTANT
 module-keyword := IDENTIFIER
 import-keyword := IDENTIFIER
Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -10,4 +10,5 @@
 
   LINK_LIBS
   clangPseudoGrammar
+  clangPseudo
   )
Index: clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
@@ -2,12 +2,10 @@
   Support
   )
 
-add_clang_library(clangPseudoCXX
-  CXX.cpp
-
-  DEPENDS
-  cxx_gen
+add_clang_library(clangPseudoCLI
+  CLI.cpp
 
   LINK_LIBS
   clangPseudoGrammar
+  clangPseudoCXX
   )
Index: clang-tools-extra/pseudo/lib/cli/CLI.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -0,0 +1,51 @@
+
+
+#include "clang-pseudo/cxx/CXX.h"
+
+#include "clang-pseudo/Language.h"
+#include "clang-pseudo/grammar/LRTable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using llvm::cl::desc;
+using llvm::cl::init;
+using llvm::cl::opt;
+static opt<std::string>
+    Grammar("grammar",
+            desc("Specify a BNF grammar file path, or builtin language (cxx)."),
+            init("cxx"));
+
+namespace clang {
+namespace pseudo {
+
+const ParseLang &getParseLang() {
+  if (::Grammar == "cxx")
+    return cxx::getLanguage();
+
+  static ParseLang *PL = [&]() {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
+        llvm::MemoryBuffer::getFile(::Grammar);
+    if (std::error_code EC = GrammarText.getError()) {
+      llvm::errs() << "Error: can't read grammar file '" << ::Grammar
+                   << "': " << EC.message() << "\n";
+      std::exit(1);
+    }
+    std::vector<std::string> Diags;
+    auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
+    if (!Diags.empty()) {
+      for (const auto &Diag : Diags)
+        llvm::errs() << Diag << "\n";
+      std::exit(1);
+    }
+
+    LRTable *Table = new LRTable(LRTable::buildSLR(*G));
+    llvm::DenseMap<ExtensionID, Guard> *Guards =
+        new llvm::DenseMap<ExtensionID, Guard>();
+    return new ParseLang{*G.release(), *Table, *Guards};
+  }();
+  return *PL;
+}
+
+} // namespace pseudo
+} // namespace clang
\ No newline at end of file
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -41,14 +41,14 @@
                            SymbolID StartSymbol) {
   assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
   llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
-  auto &G = Params.G;
+  auto &G = Params.Lang.G;
   (void)G;
   auto &GSS = Params.GSStack;
 
   // Lists of active shift, reduce actions.
   std::vector<ParseStep> PendingShift, PendingReduce;
   auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) {
-    for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) {
+    for (const auto &Action : Params.Lang.Table.getActions(Head->State, NextTok)) {
       switch (Action.kind()) {
       case LRTable::Action::Shift:
         PendingShift.push_back({Head, Action});
@@ -61,7 +61,7 @@
       }
     }
   };
-  StateID StartState = Params.Table.getStartState(StartSymbol);
+  StateID StartState = Params.Lang.Table.getStartState(StartSymbol);
   std::vector<const GSS::Node *> NewHeads = {
       GSS.addNode(/*State=*/StartState,
                   /*ForestNode=*/nullptr, {})};
@@ -84,7 +84,7 @@
     for (const auto *Head : NewHeads)
       AddSteps(Head, Terminal.symbol());
     NewHeads.clear();
-    glrReduce(PendingReduce, Params,
+    glrReduce(PendingReduce, Params, Tokens,
               [&](const GSS::Node * NewHead) {
                 // A reduce will enable more steps.
                 AddSteps(NewHead, Terminal.symbol());
@@ -98,10 +98,10 @@
   for (const auto *Heads : NewHeads)
     AddSteps(Heads, tokenSymbol(tok::eof));
 
-  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  StateID AcceptState = Params.Lang.Table.getGoToState(StartState, StartSymbol);
   // Collect new heads created from the final reduce.
   std::vector<const GSS::Node*> Heads;
-  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {
+  glrReduce(PendingReduce, Params, Tokens, [&](const GSS::Node *NewHead) {
     Heads.push_back(NewHead);
     // A reduce will enable more steps.
     AddSteps(NewHead, tokenSymbol(tok::eof));
@@ -147,7 +147,7 @@
                       }) &&
          "Pending shift actions must be shift actions");
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("  Shift {0} ({1} active heads):\n",
-                                           Params.G.symbolName(NewTok.symbol()),
+                                           Params.Lang.G.symbolName(NewTok.symbol()),
                                            PendingShift.size()));
 
   // We group pending shifts by their target state so we can merge them.
@@ -232,6 +232,7 @@
 //                  2 by`enum-name := class-name STAR`:
 //     0--5(pointer)       // 5 is goto(0, pointer)
 void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
+               const TokenStream& Tokens,
                NewHeadCallback NewHeadCB) {
   // There are two interacting complications:
   // 1.  Performing one reduce can unlock new reduces on the newly-created head.
@@ -294,12 +295,18 @@
   // Pop walks up the parent chain(s) for a reduction from Head by to Rule.
   // Once we reach the end, record the bases and sequences.
   auto Pop = [&](const GSS::Node *Head, RuleID RID) {
-    LLVM_DEBUG(llvm::dbgs() << "  Pop " << Params.G.dumpRule(RID) << "\n");
-    const auto &Rule = Params.G.lookupRule(RID);
+    LLVM_DEBUG(llvm::dbgs() << "  Pop " << Params.Lang.G.dumpRule(RID) << "\n");
+    const auto &Rule = Params.Lang.G.lookupRule(RID);
     Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID};
     TempSequence.resize_for_overwrite(Rule.Size);
     auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
       if (I == Rule.Size) {
+        if (Rule.Guard) {
+          auto It = Params.Lang.Guards.find(Rule.Guard);
+          assert(It != Params.Lang.Guards.end() && "missing guard!");
+          if (!It->getSecond()(TempSequence, Tokens))
+            return;
+        }
         F.Start = TempSequence.front()->startTokenIndex();
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
         Sequences.emplace(F, PushSpec{N, TempSequence});
@@ -331,7 +338,7 @@
   while (!Sequences.empty()) {
     Family F = Sequences.top().first;
 
-    LLVM_DEBUG(llvm::dbgs() << "  Push " << Params.G.symbolName(F.Symbol)
+    LLVM_DEBUG(llvm::dbgs() << "  Push " << Params.Lang.G.symbolName(F.Symbol)
                             << " from token " << F.Start << "\n");
 
     // Grab the sequences and bases for this family.
@@ -344,7 +351,7 @@
       FamilySequences.emplace_back(Sequences.top().first.Rule,
                                    Sequences.top().second.Seq);
       FamilyBases.emplace_back(
-          Params.Table.getGoToState(Sequences.top().second.Base->State,
+          Params.Lang.Table.getGoToState(Sequences.top().second.Base->State,
                                     F.Symbol),
           Sequences.top().second.Base);
 
@@ -362,7 +369,7 @@
         SequenceNodes.size() == 1
             ? SequenceNodes.front()
             : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes);
-    LLVM_DEBUG(llvm::dbgs() << "    --> " << Parsed->dump(Params.G) << "\n");
+    LLVM_DEBUG(llvm::dbgs() << "    --> " << Parsed->dump(Params.Lang.G) << "\n");
 
     // Bases for this family, deduplicate them, and group by the goTo State.
     sortAndUnique(FamilyBases);
Index: clang-tools-extra/pseudo/lib/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(cli)
 add_subdirectory(cxx)
 add_subdirectory(grammar)
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -23,6 +23,7 @@
 #ifndef CLANG_PSEUDO_CXX_CXX_H
 #define CLANG_PSEUDO_CXX_CXX_H
 
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 
 namespace clang {
@@ -38,11 +39,19 @@
 #undef NONTERMINAL
 };
 
+enum class Extension : ExtensionID {
+#define EXTENSION(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef EXTENSION
+};
+
 // Returns the C++ grammar.
 const Grammar &getGrammar();
 // Returns the corresponding LRTable for the C++ grammar.
 const LRTable &getLRTable();
 
+const ParseLang &getLanguage();
+
 } // namespace cxx
 
 } // namespace pseudo
Index: clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
@@ -0,0 +1,12 @@
+
+#ifndef CLANG_PSEUDO_CLI_CLI_H
+#define CLANG_PSEUDO_CLI_CLI_H
+
+namespace clang {
+namespace pseudo {
+struct ParseLang;
+const ParseLang &getParseLang();
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CLI_CLI_H
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -0,0 +1,31 @@
+
+#ifndef CLANG_PSEUDO_LANGUAGE_H
+#define CLANG_PSEUDO_LANGUAGE_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+
+namespace clang {
+namespace pseudo {
+
+class ForestNode;
+class TokenStream;
+class LRTable;
+
+using Guard = llvm::function_ref<bool(llvm::ArrayRef<const ForestNode *> RHS,
+                                      const TokenStream &)>;
+
+struct ParseLang {
+  const Grammar &G;
+  const LRTable &Table;
+  const llvm::DenseMap<ExtensionID, Guard> &Guards;
+
+  // FIXME: add clang::LangOptions.
+  // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_LANGUAGE_H
\ No newline at end of file
Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -30,6 +30,7 @@
 #define CLANG_PSEUDO_GLR_H
 
 #include "clang-pseudo/Forest.h"
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "llvm/Support/Allocator.h"
@@ -113,12 +114,7 @@
 
 // Parameters for the GLR parsing.
 struct ParseParams {
-  // The grammar of the language we're going to parse.
-  const Grammar &G;
-  // The LR table which GLR uses to parse the input, should correspond to the
-  // Grammar G.
-  const LRTable &Table;
-
+  const ParseLang &Lang;
   // Arena for data structure used by the GLR algorithm.
   ForestArena &Forest;  // Storage for the output forest.
   GSS &GSStack;         // Storage for parsing stacks.
@@ -159,7 +155,7 @@
 //
 // Exposed for testing only.
 void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
-               NewHeadCallback NewHeadCB);
+               const TokenStream &Tokens, NewHeadCallback NewHeadCB);
 
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -79,6 +79,14 @@
 
   switch (Emit) {
   case EmitSymbolList:
+    Out.os() << R"cpp(
+#ifndef NONTERMINAL
+#define NONTERMINAL(X, Y)
+#endif
+#ifndef EXTENSION
+#define EXTENSION(X, Y)
+#endif
+    )cpp";
     for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size();
          ++ID) {
       std::string Name = G->symbolName(ID).str();
@@ -86,6 +94,16 @@
       std::replace(Name.begin(), Name.end(), '-', '_');
       Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
     }
+    for (clang::pseudo::ExtensionID AID = 1 /*skip the sentinel 0 value*/;
+         AID < G->table().AttributeValues.size(); ++AID) {
+      llvm::StringRef Name = G->table().AttributeValues[AID];
+      assert(!Name.empty());
+      Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, AID);
+    }
+    Out.os() << R"cpp(
+#undef NONTERMINAL
+#undef EXTENSION
+    )cpp";
     break;
   case EmitGrammarContent:
     for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
Index: clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
===================================================================
--- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -10,6 +10,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -24,28 +25,11 @@
 
 class Fuzzer {
   clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
-  std::unique_ptr<Grammar> G;
   LRTable T;
   bool Print;
 
 public:
-  Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
-    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
-        llvm::MemoryBuffer::getFile(GrammarPath);
-    if (std::error_code EC = GrammarText.getError()) {
-      llvm::errs() << "Error: can't read grammar file '" << GrammarPath
-                   << "': " << EC.message() << "\n";
-      std::exit(1);
-    }
-    std::vector<std::string> Diags;
-    G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
-    if (!Diags.empty()) {
-      for (const auto &Diag : Diags)
-        llvm::errs() << Diag << "\n";
-      std::exit(1);
-    }
-    T = LRTable::buildSLR(*G);
-  }
+  Fuzzer(bool Print) : Print(Print) {}
 
   void operator()(llvm::StringRef Code) {
     std::string CodeStr = Code.str(); // Must be null-terminated.
@@ -58,11 +42,11 @@
 
     clang::pseudo::ForestArena Arena;
     clang::pseudo::GSS GSS;
-    auto &Root =
-        glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS},
-                 *G->findNonterminal("translation-unit"));
+    auto &Root = glrParse(
+        ParseableStream, clang::pseudo::ParseParams{getParseLang(), Arena, GSS},
+        *getParseLang().G.findNonterminal("translation-unit"));
     if (Print)
-      llvm::outs() << Root.dumpRecursive(*G);
+      llvm::outs() << Root.dumpRecursive(getParseLang().G);
   }
 };
 
@@ -78,13 +62,9 @@
 //  -grammar=<file> (required) - path to cxx.bnf
 //  -print                     - used for testing the fuzzer
 int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
-  llvm::StringRef GrammarFile;
   bool PrintForest = false;
   auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
-    if (Arg.consume_front("-grammar=")) {
-      GrammarFile = Arg;
-      return true;
-    } else if (Arg == "-print") {
+    if (Arg == "-print") {
       PrintForest = true;
       return true;
     }
@@ -92,11 +72,7 @@
   };
   *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
 
-  if (GrammarFile.empty()) {
-    fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
-    exit(1);
-  }
-  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
+  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
   return 0;
 }
 
Index: clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
+++ clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
@@ -11,5 +11,6 @@
 target_link_libraries(clang-pseudo-fuzzer
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   )
Index: clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
+++ clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
@@ -3,6 +3,7 @@
 target_link_libraries(ClangPseudoBenchmark
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   LLVMSupport
   )
Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
===================================================================
--- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -25,6 +25,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -39,9 +40,6 @@
 using llvm::cl::opt;
 using llvm::cl::Required;
 
-static opt<std::string> GrammarFile("grammar",
-                                    desc("Parse and check a BNF grammar file."),
-                                    Required);
 static opt<std::string> Source("source", desc("Source file"), Required);
 
 namespace clang {
@@ -49,11 +47,9 @@
 namespace bench {
 namespace {
 
-const std::string *GrammarText = nullptr;
 const std::string *SourceText = nullptr;
-const Grammar *G = nullptr;
 
-void setupGrammarAndSource() {
+void setupSource() {
   auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
         llvm::MemoryBuffer::getFile(FilePath);
@@ -64,22 +60,12 @@
     }
     return GrammarText.get()->getBuffer().str();
   };
-  GrammarText = new std::string(ReadFile(GrammarFile));
   SourceText = new std::string(ReadFile(Source));
-  std::vector<std::string> Diags;
-  G = Grammar::parseBNF(*GrammarText, Diags).release();
 }
 
-static void parseBNF(benchmark::State &State) {
-  std::vector<std::string> Diags;
-  for (auto _ : State)
-    Grammar::parseBNF(*GrammarText, Diags);
-}
-BENCHMARK(parseBNF);
-
 static void buildSLR(benchmark::State &State) {
   for (auto _ : State)
-    LRTable::buildSLR(*G);
+    LRTable::buildSLR(getParseLang().G);
 }
 BENCHMARK(buildSLR);
 
@@ -129,13 +115,13 @@
 BENCHMARK(preprocess);
 
 static void glrParse(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *getParseLang().G.findNonterminal("translation-unit");
   TokenStream Stream = lexAndPreprocess();
   for (auto _ : State) {
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
+    pseudo::glrParse(Stream, ParseParams{getParseLang(), Forest, GSS},
+                     StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());
@@ -143,14 +129,13 @@
 BENCHMARK(glrParse);
 
 static void full(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *getParseLang().G.findNonterminal("translation-unit");
   for (auto _ : State) {
     TokenStream Stream = lexAndPreprocess();
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
-                     StartSymbol);
+    pseudo::glrParse(lexAndPreprocess(),
+                     ParseParams{getParseLang(), Forest, GSS}, StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());
@@ -165,7 +150,7 @@
 int main(int argc, char *argv[]) {
   benchmark::Initialize(&argc, argv);
   llvm::cl::ParseCommandLineOptions(argc, argv);
-  clang::pseudo::bench::setupGrammarAndSource();
+  clang::pseudo::bench::setupSource();
   benchmark::RunSpecifiedBenchmarks();
   return 0;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to