hokein created this revision. hokein added a reviewer: sammccall. Herald added a subscriber: mgorny. Herald added a project: All. hokein requested review of this revision. Herald added subscribers: cfe-commits, alextsao1999. Herald added a project: clang-tools-extra.
- define a common data structure ParseLang which is a compiled result of the bnf grammar (output of clangPseudoCXX and clangPseudoCLI). It is defined in Language.h. The Language.h file is shared with differnt libraries; - creates a clangPseudoCLI lib which defines a `grammar` commandline flag and expose a function to get the ParseLang. It supports --grammar=cxx, --grammmar=/path/to/file.bnf. It is used in clang-pseudo, fuzzer, and benchmark tools; - implement two simple guards (contextual-override/final) for cxx.bnf; - layering: clangPseudoCXX depends on clangPseudo (as the guard function need to access the TokenStream); TODO: - comments are missing; - figure out better name for ParseLang; - add lit&unit tests for the cxx guards; Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D127448 Files: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp clang-tools-extra/pseudo/benchmarks/CMakeLists.txt clang-tools-extra/pseudo/fuzzer/CMakeLists.txt clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp clang-tools-extra/pseudo/gen/Main.cpp clang-tools-extra/pseudo/include/clang-pseudo/GLR.h clang-tools-extra/pseudo/include/clang-pseudo/Language.h clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h clang-tools-extra/pseudo/lib/CMakeLists.txt clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/cli/CLI.cpp clang-tools-extra/pseudo/lib/cli/CMakeLists.txt clang-tools-extra/pseudo/lib/cxx.bnf clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt clang-tools-extra/pseudo/lib/cxx/CXX.cpp clang-tools-extra/pseudo/tool/CMakeLists.txt clang-tools-extra/pseudo/tool/ClangPseudo.cpp clang-tools-extra/pseudo/unittests/GLRTest.cpp
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp =================================================================== --- clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -48,7 +48,15 @@ std::vector<std::string> Diags; G = Grammar::parseBNF(GrammarBNF, Diags); } - + // FIXME: move to TokenStream class. + TokenStream emptyTokenStream() { + TokenStream Empty; + Empty.finalize(); + return Empty; + } + ParseLang getTestLang() { + return {*G, Table, Guards}; + } void buildGrammar(std::vector<std::string> Nonterminals, std::vector<std::string> Rules) { Nonterminals.push_back("_"); @@ -91,6 +99,8 @@ protected: std::unique_ptr<Grammar> G; + LRTable Table; + llvm::DenseMap<ExtensionID, Guard> Guards; ForestArena Arena; GSS GSStack; std::vector<const GSS::Node*> NewHeadResults; @@ -117,7 +127,7 @@ /*Parents=*/{GSSNode0}); buildGrammar({}, {}); // Create a fake empty grammar. - LRTable T = LRTable::buildForTests(G->table(), /*Entries=*/{}); + Table = LRTable::buildForTests(G->table(), /*Entries=*/{}); ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0); std::vector<ParseStep> PendingShift = { @@ -125,7 +135,7 @@ {GSSNode3, Action::shift(5)}, {GSSNode2, Action::shift(4)}, }; - glrShift(PendingShift, SemiTerminal, {*G, T, Arena, GSStack}, + glrShift(PendingShift, SemiTerminal, {getTestLang(), Arena, GSStack}, captureNewHeads()); EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre( @@ -146,7 +156,7 @@ buildGrammar({"class-name", "enum-name"}, {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"}); - LRTable Table = LRTable::buildForTests( + Table = LRTable::buildForTests( G->table(), {{/*State=*/0, id("class-name"), Action::goTo(2)}, {/*State=*/0, id("enum-name"), Action::goTo(3)}}); @@ -158,7 +168,7 @@ std::vector<ParseStep> PendingReduce = { {GSSNode1, Action::reduce(ruleFor("class-name"))}, {GSSNode1, Action::reduce(ruleFor("enum-name"))}}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, + glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(), captureNewHeads()); EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre( @@ -189,13 +199,13 @@ /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1), /*Parents=*/{GSSNode2, GSSNode3}); - LRTable Table = LRTable::buildForTests( + Table = LRTable::buildForTests( G->table(), {{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)}, {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}}); std::vector<ParseStep> PendingReduce = { {GSSNode4, Action::reduce(ruleFor("ptr-operator"))}}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, + glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(), captureNewHeads()); EXPECT_THAT(NewHeadResults, @@ -238,7 +248,7 @@ GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, /*Parents=*/{GSSNode2}); - LRTable Table = LRTable::buildForTests( + Table = LRTable::buildForTests( G->table(), {{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)}, {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)}}); @@ -250,7 +260,7 @@ { GSSNode4, Action::reduce(/*RuleID=*/1) // type-name := enum-name }}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, + glrReduce(PendingReduce, {getTestLang(), Arena, GSStack}, emptyTokenStream(), captureNewHeads()); // Verify that the stack heads are joint at state 5 after reduces. @@ -296,7 +306,7 @@ GSStack.addNode(/*State=*/4, /*ForestNode=*/StartTerminal, /*Parents=*/{GSSNode2}); - LRTable Table = LRTable::buildForTests( + Table = LRTable::buildForTests( G->table(), {{/*State=*/0, id("pointer"), Action::goTo(5)}}); // FIXME: figure out a way to get rid of the hard-coded reduce RuleID! std::vector<ParseStep> PendingReduce = { @@ -306,7 +316,7 @@ { GSSNode4, Action::reduce(/*RuleID=*/1) // pointer := enum-name * }}; - glrReduce(PendingReduce, {*G, Table, Arena, GSStack}, + glrReduce(PendingReduce,{getTestLang(), Arena, GSStack}, emptyTokenStream(), captureNewHeads()); EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre( @@ -340,12 +350,12 @@ left-paren := { expr := IDENTIFIER )bnf"); + Table = LRTable::buildSLR(*G); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); const ForestNode &Parsed = - glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); + glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test")); // Verify that there is no duplicated sequence node of `expr := IDENTIFIER` // in the forest, see the `#1` and `=#1` in the dump string. EXPECT_EQ(Parsed.dumpRecursive(*G), @@ -380,10 +390,10 @@ )bnf"); clang::LangOptions LOptions; const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); + Table = LRTable::buildSLR(*G); const ForestNode &Parsed = - glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); + glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test")); EXPECT_EQ(Parsed.dumpRecursive(*G), "[ 0, end) test := <ambiguous>\n" "[ 0, end) ââtest := IDENTIFIER\n" @@ -405,10 +415,10 @@ // of the nonterminal `test` when the next token is `eof`, verify that the // parser stops at the right state. const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions); - auto LRTable = LRTable::buildSLR(*G); + Table = LRTable::buildSLR(*G); const ForestNode &Parsed = - glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); + glrParse(Tokens, {getTestLang(), Arena, GSStack}, id("test")); EXPECT_EQ(Parsed.dumpRecursive(*G), "[ 0, end) test := IDENTIFIER test\n" "[ 0, 1) ââIDENTIFIER := tok[0]\n" Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp =================================================================== --- clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -10,6 +10,7 @@ #include "clang-pseudo/DirectiveTree.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRGraph.h" #include "clang-pseudo/grammar/LRTable.h" @@ -20,14 +21,11 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Signals.h" -using clang::pseudo::Grammar; using clang::pseudo::TokenStream; using llvm::cl::desc; using llvm::cl::init; using llvm::cl::opt; -static opt<std::string> - Grammar("grammar", desc("Parse and check a BNF grammar file."), init("")); static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar.")); static opt<bool> PrintGraph("print-graph", desc("Print the LR graph for the grammar")); @@ -93,42 +91,34 @@ pairBrackets(*ParseableStream); } - if (Grammar.getNumOccurrences()) { - std::string Text = readOrDie(Grammar); - std::vector<std::string> Diags; - auto G = Grammar::parseBNF(Text, Diags); - - if (!Diags.empty()) { - llvm::errs() << llvm::join(Diags, "\n"); - return 2; - } - llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n", - Grammar); + if (true) { + const auto &Lang = clang::pseudo::getParseLang(); if (PrintGrammar) - llvm::outs() << G->dump(); + llvm::outs() << Lang.G.dump(); if (PrintGraph) - llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G); - auto LRTable = clang::pseudo::LRTable::buildSLR(*G); + llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests( + Lang.G); + if (PrintTable) - llvm::outs() << LRTable.dumpForTests(*G); + llvm::outs() << Lang.Table.dumpForTests(Lang.G); if (PrintStatistics) - llvm::outs() << LRTable.dumpStatistics(); + llvm::outs() << Lang.Table.dumpStatistics(); if (ParseableStream) { clang::pseudo::ForestArena Arena; clang::pseudo::GSS GSS; llvm::Optional<clang::pseudo::SymbolID> StartSymID = - G->findNonterminal(StartSymbol); + Lang.G.findNonterminal(StartSymbol); if (!StartSymID) { llvm::errs() << llvm::formatv( - "The start symbol {0} doesn't exit in the grammar!\n", Grammar); + "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol); return 2; } - auto &Root = glrParse(*ParseableStream, - clang::pseudo::ParseParams{*G, LRTable, Arena, GSS}, - *StartSymID); + auto &Root = + glrParse(*ParseableStream, + clang::pseudo::ParseParams{Lang, Arena, GSS}, *StartSymID); if (PrintForest) - llvm::outs() << Root.dumpRecursive(*G, /*Abbreviated=*/true); + llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true); if (PrintStatistics) { llvm::outs() << "Forest bytes: " << Arena.bytes() Index: clang-tools-extra/pseudo/tool/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/tool/CMakeLists.txt +++ clang-tools-extra/pseudo/tool/CMakeLists.txt @@ -13,5 +13,6 @@ PRIVATE clangPseudo clangPseudoGrammar + clangPseudoCLI ) Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp =================================================================== --- clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/Forest.h" +#include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" +#include <utility> namespace clang { namespace pseudo { @@ -29,6 +32,30 @@ return *Table; } +namespace { +bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && + RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override"; +} +bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && + RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final"; +} +} // namespace + +const ParseLang &getLanguage() { + static llvm::DenseMap<ExtensionID, Guard> *Guards = + new llvm::DenseMap<ExtensionID, Guard>( + {{(ExtensionID)Extension::Override, guardOverride}, + {(ExtensionID)Extension::Final, guardFinal}}); + static ParseLang *L = new ParseLang{getGrammar(), getLRTable(), *Guards}; + return *L; +} + } // namespace cxx } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/lib/cxx.bnf =================================================================== --- clang-tools-extra/pseudo/lib/cxx.bnf +++ clang-tools-extra/pseudo/lib/cxx.bnf @@ -739,8 +739,8 @@ #! Contextual keywords -- clang lexer always lexes them as identifier tokens. #! Placeholders for literal text in the grammar that lex as other things. -contextual-override := IDENTIFIER -contextual-final := IDENTIFIER +contextual-override := IDENTIFIER [guard=Override] +contextual-final := IDENTIFIER [guard=Final] contextual-zero := NUMERIC_CONSTANT module-keyword := IDENTIFIER import-keyword := IDENTIFIER Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt +++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -10,4 +10,5 @@ LINK_LIBS clangPseudoGrammar + clangPseudo ) Index: clang-tools-extra/pseudo/lib/cli/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/lib/cli/CMakeLists.txt +++ clang-tools-extra/pseudo/lib/cli/CMakeLists.txt @@ -2,12 +2,10 @@ Support ) -add_clang_library(clangPseudoCXX - CXX.cpp - - DEPENDS - cxx_gen +add_clang_library(clangPseudoCLI + CLI.cpp LINK_LIBS clangPseudoGrammar + clangPseudoCXX ) Index: clang-tools-extra/pseudo/lib/cli/CLI.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/cli/CLI.cpp @@ -0,0 +1,51 @@ + + +#include "clang-pseudo/cxx/CXX.h" + +#include "clang-pseudo/Language.h" +#include "clang-pseudo/grammar/LRTable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +static opt<std::string> + Grammar("grammar", + desc("Specify a BNF grammar file path, or builtin language (cxx)."), + init("cxx")); + +namespace clang { +namespace pseudo { + +const ParseLang &getParseLang() { + if (::Grammar == "cxx") + return cxx::getLanguage(); + + static ParseLang *PL = [&]() { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText = + llvm::MemoryBuffer::getFile(::Grammar); + if (std::error_code EC = GrammarText.getError()) { + llvm::errs() << "Error: can't read grammar file '" << ::Grammar + << "': " << EC.message() << "\n"; + std::exit(1); + } + std::vector<std::string> Diags; + auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags); + if (!Diags.empty()) { + for (const auto &Diag : Diags) + llvm::errs() << Diag << "\n"; + std::exit(1); + } + + LRTable *Table = new LRTable(LRTable::buildSLR(*G)); + llvm::DenseMap<ExtensionID, Guard> *Guards = + new llvm::DenseMap<ExtensionID, Guard>(); + return new ParseLang{*G.release(), *Table, *Guards}; + }(); + return *PL; +} + +} // namespace pseudo +} // namespace clang \ No newline at end of file Index: clang-tools-extra/pseudo/lib/GLR.cpp =================================================================== --- clang-tools-extra/pseudo/lib/GLR.cpp +++ clang-tools-extra/pseudo/lib/GLR.cpp @@ -41,14 +41,14 @@ SymbolID StartSymbol) { assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal"); llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens); - auto &G = Params.G; + auto &G = Params.Lang.G; (void)G; auto &GSS = Params.GSStack; // Lists of active shift, reduce actions. std::vector<ParseStep> PendingShift, PendingReduce; auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) { - for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) { + for (const auto &Action : Params.Lang.Table.getActions(Head->State, NextTok)) { switch (Action.kind()) { case LRTable::Action::Shift: PendingShift.push_back({Head, Action}); @@ -61,7 +61,7 @@ } } }; - StateID StartState = Params.Table.getStartState(StartSymbol); + StateID StartState = Params.Lang.Table.getStartState(StartSymbol); std::vector<const GSS::Node *> NewHeads = { GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, {})}; @@ -84,7 +84,7 @@ for (const auto *Head : NewHeads) AddSteps(Head, Terminal.symbol()); NewHeads.clear(); - glrReduce(PendingReduce, Params, + glrReduce(PendingReduce, Params, Tokens, [&](const GSS::Node * NewHead) { // A reduce will enable more steps. AddSteps(NewHead, Terminal.symbol()); @@ -98,10 +98,10 @@ for (const auto *Heads : NewHeads) AddSteps(Heads, tokenSymbol(tok::eof)); - StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol); + StateID AcceptState = Params.Lang.Table.getGoToState(StartState, StartSymbol); // Collect new heads created from the final reduce. std::vector<const GSS::Node*> Heads; - glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) { + glrReduce(PendingReduce, Params, Tokens, [&](const GSS::Node *NewHead) { Heads.push_back(NewHead); // A reduce will enable more steps. AddSteps(NewHead, tokenSymbol(tok::eof)); @@ -147,7 +147,7 @@ }) && "Pending shift actions must be shift actions"); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n", - Params.G.symbolName(NewTok.symbol()), + Params.Lang.G.symbolName(NewTok.symbol()), PendingShift.size())); // We group pending shifts by their target state so we can merge them. @@ -232,6 +232,7 @@ // 2 by`enum-name := class-name STAR`: // 0--5(pointer) // 5 is goto(0, pointer) void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params, + const TokenStream& Tokens, NewHeadCallback NewHeadCB) { // There are two interacting complications: // 1. Performing one reduce can unlock new reduces on the newly-created head. @@ -294,12 +295,18 @@ // Pop walks up the parent chain(s) for a reduction from Head by to Rule. // Once we reach the end, record the bases and sequences. auto Pop = [&](const GSS::Node *Head, RuleID RID) { - LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n"); - const auto &Rule = Params.G.lookupRule(RID); + LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.Lang.G.dumpRule(RID) << "\n"); + const auto &Rule = Params.Lang.G.lookupRule(RID); Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID}; TempSequence.resize_for_overwrite(Rule.Size); auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) { if (I == Rule.Size) { + if (Rule.Guard) { + auto It = Params.Lang.Guards.find(Rule.Guard); + assert(It != Params.Lang.Guards.end() && "missing guard!"); + if (!It->getSecond()(TempSequence, Tokens)) + return; + } F.Start = TempSequence.front()->startTokenIndex(); LLVM_DEBUG(llvm::dbgs() << " --> base at S" << N->State << "\n"); Sequences.emplace(F, PushSpec{N, TempSequence}); @@ -331,7 +338,7 @@ while (!Sequences.empty()) { Family F = Sequences.top().first; - LLVM_DEBUG(llvm::dbgs() << " Push " << Params.G.symbolName(F.Symbol) + LLVM_DEBUG(llvm::dbgs() << " Push " << Params.Lang.G.symbolName(F.Symbol) << " from token " << F.Start << "\n"); // Grab the sequences and bases for this family. @@ -344,7 +351,7 @@ FamilySequences.emplace_back(Sequences.top().first.Rule, Sequences.top().second.Seq); FamilyBases.emplace_back( - Params.Table.getGoToState(Sequences.top().second.Base->State, + Params.Lang.Table.getGoToState(Sequences.top().second.Base->State, F.Symbol), Sequences.top().second.Base); @@ -362,7 +369,7 @@ SequenceNodes.size() == 1 ? SequenceNodes.front() : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes); - LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Params.G) << "\n"); + LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Params.Lang.G) << "\n"); // Bases for this family, deduplicate them, and group by the goTo State. sortAndUnique(FamilyBases); Index: clang-tools-extra/pseudo/lib/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/lib/CMakeLists.txt +++ clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(cli) add_subdirectory(cxx) add_subdirectory(grammar) Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h +++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -23,6 +23,7 @@ #ifndef CLANG_PSEUDO_CXX_CXX_H #define CLANG_PSEUDO_CXX_CXX_H +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" namespace clang { @@ -38,11 +39,19 @@ #undef NONTERMINAL }; +enum class Extension : ExtensionID { +#define EXTENSION(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef EXTENSION +}; + // Returns the C++ grammar. const Grammar &getGrammar(); // Returns the corresponding LRTable for the C++ grammar. const LRTable &getLRTable(); +const ParseLang &getLanguage(); + } // namespace cxx } // namespace pseudo Index: clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h @@ -0,0 +1,12 @@ + +#ifndef CLANG_PSEUDO_CLI_CLI_H +#define CLANG_PSEUDO_CLI_CLI_H + +namespace clang { +namespace pseudo { +struct ParseLang; +const ParseLang &getParseLang(); +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CLI_CLI_H Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -0,0 +1,31 @@ + +#ifndef CLANG_PSEUDO_LANGUAGE_H +#define CLANG_PSEUDO_LANGUAGE_H + +#include "clang-pseudo/grammar/Grammar.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLFunctionalExtras.h" + +namespace clang { +namespace pseudo { + +class ForestNode; +class TokenStream; +class LRTable; + +using Guard = llvm::function_ref<bool(llvm::ArrayRef<const ForestNode *> RHS, + const TokenStream &)>; + +struct ParseLang { + const Grammar &G; + const LRTable &Table; + const llvm::DenseMap<ExtensionID, Guard> &Guards; + + // FIXME: add clang::LangOptions. + // FIXME: add default start symbols. +}; + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_LANGUAGE_H \ No newline at end of file Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -30,6 +30,7 @@ #define CLANG_PSEUDO_GLR_H #include "clang-pseudo/Forest.h" +#include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "llvm/Support/Allocator.h" @@ -113,12 +114,7 @@ // Parameters for the GLR parsing. struct ParseParams { - // The grammar of the language we're going to parse. - const Grammar &G; - // The LR table which GLR uses to parse the input, should correspond to the - // Grammar G. - const LRTable &Table; - + const ParseLang ⟪ // Arena for data structure used by the GLR algorithm. ForestArena &Forest; // Storage for the output forest. GSS &GSStack; // Storage for parsing stacks. @@ -159,7 +155,7 @@ // // Exposed for testing only. void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params, - NewHeadCallback NewHeadCB); + const TokenStream &Tokens, NewHeadCallback NewHeadCB); } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/gen/Main.cpp =================================================================== --- clang-tools-extra/pseudo/gen/Main.cpp +++ clang-tools-extra/pseudo/gen/Main.cpp @@ -79,6 +79,14 @@ switch (Emit) { case EmitSymbolList: + Out.os() << R"cpp( +#ifndef NONTERMINAL +#define NONTERMINAL(X, Y) +#endif +#ifndef EXTENSION +#define EXTENSION(X, Y) +#endif + )cpp"; for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); ++ID) { std::string Name = G->symbolName(ID).str(); @@ -86,6 +94,16 @@ std::replace(Name.begin(), Name.end(), '-', '_'); Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID); } + for (clang::pseudo::ExtensionID AID = 1 /*skip the sentinel 0 value*/; + AID < G->table().AttributeValues.size(); ++AID) { + llvm::StringRef Name = G->table().AttributeValues[AID]; + assert(!Name.empty()); + Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, AID); + } + Out.os() << R"cpp( +#undef NONTERMINAL +#undef EXTENSION + )cpp"; break; case EmitGrammarContent: for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { Index: clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp =================================================================== --- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -10,6 +10,7 @@ #include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/LangOptions.h" @@ -24,28 +25,11 @@ class Fuzzer { clang::LangOptions LangOpts = clang::pseudo::genericLangOpts(); - std::unique_ptr<Grammar> G; LRTable T; bool Print; public: - Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) { - llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText = - llvm::MemoryBuffer::getFile(GrammarPath); - if (std::error_code EC = GrammarText.getError()) { - llvm::errs() << "Error: can't read grammar file '" << GrammarPath - << "': " << EC.message() << "\n"; - std::exit(1); - } - std::vector<std::string> Diags; - G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags); - if (!Diags.empty()) { - for (const auto &Diag : Diags) - llvm::errs() << Diag << "\n"; - std::exit(1); - } - T = LRTable::buildSLR(*G); - } + Fuzzer(bool Print) : Print(Print) {} void operator()(llvm::StringRef Code) { std::string CodeStr = Code.str(); // Must be null-terminated. @@ -58,11 +42,11 @@ clang::pseudo::ForestArena Arena; clang::pseudo::GSS GSS; - auto &Root = - glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS}, - *G->findNonterminal("translation-unit")); + auto &Root = glrParse( + ParseableStream, clang::pseudo::ParseParams{getParseLang(), Arena, GSS}, + *getParseLang().G.findNonterminal("translation-unit")); if (Print) - llvm::outs() << Root.dumpRecursive(*G); + llvm::outs() << Root.dumpRecursive(getParseLang().G); } }; @@ -78,13 +62,9 @@ // -grammar=<file> (required) - path to cxx.bnf // -print - used for testing the fuzzer int LLVMFuzzerInitialize(int *Argc, char ***Argv) { - llvm::StringRef GrammarFile; bool PrintForest = false; auto ConsumeArg = [&](llvm::StringRef Arg) -> bool { - if (Arg.consume_front("-grammar=")) { - GrammarFile = Arg; - return true; - } else if (Arg == "-print") { + if (Arg == "-print") { PrintForest = true; return true; } @@ -92,11 +72,7 @@ }; *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv; - if (GrammarFile.empty()) { - fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n"); - exit(1); - } - clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest); + clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest); return 0; } Index: clang-tools-extra/pseudo/fuzzer/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/fuzzer/CMakeLists.txt +++ clang-tools-extra/pseudo/fuzzer/CMakeLists.txt @@ -11,5 +11,6 @@ target_link_libraries(clang-pseudo-fuzzer PRIVATE clangPseudo + clangPseudoCLI clangPseudoGrammar ) Index: clang-tools-extra/pseudo/benchmarks/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/benchmarks/CMakeLists.txt +++ clang-tools-extra/pseudo/benchmarks/CMakeLists.txt @@ -3,6 +3,7 @@ target_link_libraries(ClangPseudoBenchmark PRIVATE clangPseudo + clangPseudoCLI clangPseudoGrammar LLVMSupport ) Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp =================================================================== --- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -25,6 +25,7 @@ #include "clang-pseudo/Forest.h" #include "clang-pseudo/GLR.h" #include "clang-pseudo/Token.h" +#include "clang-pseudo/cli/CLI.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/LangOptions.h" @@ -39,9 +40,6 @@ using llvm::cl::opt; using llvm::cl::Required; -static opt<std::string> GrammarFile("grammar", - desc("Parse and check a BNF grammar file."), - Required); static opt<std::string> Source("source", desc("Source file"), Required); namespace clang { @@ -49,11 +47,9 @@ namespace bench { namespace { -const std::string *GrammarText = nullptr; const std::string *SourceText = nullptr; -const Grammar *G = nullptr; -void setupGrammarAndSource() { +void setupSource() { auto ReadFile = [](llvm::StringRef FilePath) -> std::string { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText = llvm::MemoryBuffer::getFile(FilePath); @@ -64,22 +60,12 @@ } return GrammarText.get()->getBuffer().str(); }; - GrammarText = new std::string(ReadFile(GrammarFile)); SourceText = new std::string(ReadFile(Source)); - std::vector<std::string> Diags; - G = Grammar::parseBNF(*GrammarText, Diags).release(); } -static void parseBNF(benchmark::State &State) { - std::vector<std::string> Diags; - for (auto _ : State) - Grammar::parseBNF(*GrammarText, Diags); -} -BENCHMARK(parseBNF); - static void buildSLR(benchmark::State &State) { for (auto _ : State) - LRTable::buildSLR(*G); + LRTable::buildSLR(getParseLang().G); } BENCHMARK(buildSLR); @@ -129,13 +115,13 @@ BENCHMARK(preprocess); static void glrParse(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); - SymbolID StartSymbol = *G->findNonterminal("translation-unit"); + SymbolID StartSymbol = *getParseLang().G.findNonterminal("translation-unit"); TokenStream Stream = lexAndPreprocess(); for (auto _ : State) { pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol); + pseudo::glrParse(Stream, ParseParams{getParseLang(), Forest, GSS}, + StartSymbol); } State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) * SourceText->size()); @@ -143,14 +129,13 @@ BENCHMARK(glrParse); static void full(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); - SymbolID StartSymbol = *G->findNonterminal("translation-unit"); + SymbolID StartSymbol = *getParseLang().G.findNonterminal("translation-unit"); for (auto _ : State) { TokenStream Stream = lexAndPreprocess(); pseudo::ForestArena Forest; pseudo::GSS GSS; - pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS}, - StartSymbol); + pseudo::glrParse(lexAndPreprocess(), + ParseParams{getParseLang(), Forest, GSS}, StartSymbol); } State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) * SourceText->size()); @@ -165,7 +150,7 @@ int main(int argc, char *argv[]) { benchmark::Initialize(&argc, argv); llvm::cl::ParseCommandLineOptions(argc, argv); - clang::pseudo::bench::setupGrammarAndSource(); + clang::pseudo::bench::setupSource(); benchmark::RunSpecifiedBenchmarks(); return 0; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits