hokein updated this revision to Diff 430506. hokein added a comment. format fix
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125667/new/ https://reviews.llvm.org/D125667 Files: clang-tools-extra/pseudo/CMakeLists.txt clang-tools-extra/pseudo/gen/CMakeLists.txt clang-tools-extra/pseudo/gen/Main.cpp clang-tools-extra/pseudo/include/CMakeLists.txt clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h clang-tools-extra/pseudo/lib/CMakeLists.txt clang-tools-extra/pseudo/lib/Grammar.cpp clang-tools-extra/pseudo/lib/GrammarBNF.cpp clang-tools-extra/pseudo/lib/LRGraph.cpp clang-tools-extra/pseudo/lib/LRTable.cpp clang-tools-extra/pseudo/lib/LRTableBuild.cpp clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt clang-tools-extra/pseudo/lib/cxx/CXX.cpp clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt clang-tools-extra/pseudo/lib/grammar/Grammar.cpp clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp clang-tools-extra/pseudo/lib/grammar/LRTable.cpp clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
Index: clang-tools-extra/pseudo/lib/LRTableBuild.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/LRTableBuild.cpp @@ -1,146 +0,0 @@ -//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang-pseudo/Grammar.h" -#include "clang-pseudo/LRGraph.h" -#include "clang-pseudo/LRTable.h" -#include "clang/Basic/TokenKinds.h" -#include <cstdint> - -namespace llvm { -template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> { - using Entry = clang::pseudo::LRTable::Entry; - static inline Entry getEmptyKey() { - static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static inline Entry getTombstoneKey() { - static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static unsigned getHashValue(const Entry &I) { - return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque()); - } - static bool isEqual(const Entry &LHS, const Entry &RHS) { - return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol && - LHS.Act == RHS.Act; - } -}; -} // namespace llvm - -namespace clang { -namespace pseudo { - -class LRTable::Builder { -public: - Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates) - : StartStates(StartStates) {} - - bool insert(Entry E) { return Entries.insert(std::move(E)).second; } - LRTable build(const GrammarTable >) && { - // E.g. given the following parsing table with 3 states and 3 terminals: - // - // a b c - // +-------+----+-------+-+ - // |state0 | | s0,r0 | | - // |state1 | acc| | | - // |state2 | | r1 | | - // +-------+----+-------+-+ - // - // The final LRTable: - // - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel) - // - States: [ 1, 0, 0, 2] - // Actions: [ acc, s0, r0, r1] - // ~~~ corresponding range for terminal a - // ~~~~~~~~~~ corresponding range for terminal b - // First step, we sort all entries by (Symbol, State, Action). - std::vector<Entry> Sorted(Entries.begin(), Entries.end()); - llvm::sort(Sorted, [](const Entry &L, const Entry &R) { - return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) < - std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque()); - }); - - LRTable Table; - Table.Actions.reserve(Sorted.size()); - Table.States.reserve(Sorted.size()); - // We are good to finalize the States and Actions. - for (const auto &E : Sorted) { - Table.Actions.push_back(E.Act); - Table.States.push_back(E.State); - } - // Initialize the terminal and nonterminal offset, all ranges are empty by - // default. - Table.TerminalOffset = std::vector<uint32_t>(GT.Terminals.size() + 1, 0); - Table.NontermOffset = std::vector<uint32_t>(GT.Nonterminals.size() + 1, 0); - size_t SortedIndex = 0; - for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size(); - ++NonterminalID) { - Table.NontermOffset[NonterminalID] = SortedIndex; - while (SortedIndex < Sorted.size() && - Sorted[SortedIndex].Symbol == NonterminalID) - ++SortedIndex; - } - for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size(); - ++Terminal) { - Table.TerminalOffset[Terminal] = SortedIndex; - while (SortedIndex < Sorted.size() && - Sorted[SortedIndex].Symbol == - tokenSymbol(static_cast<tok::TokenKind>(Terminal))) - ++SortedIndex; - } - Table.StartStates = std::move(StartStates); - return Table; - } - -private: - llvm::DenseSet<Entry> Entries; - std::vector<std::pair<SymbolID, StateID>> StartStates; -}; - -LRTable LRTable::buildForTests(const GrammarTable >, - llvm::ArrayRef<Entry> Entries) { - Builder Build({}); - for (const Entry &E : Entries) - Build.insert(E); - return std::move(Build).build(GT); -} - -LRTable LRTable::buildSLR(const Grammar &G) { - auto Graph = LRGraph::buildLR0(G); - Builder Build(Graph.startStates()); - for (const auto &T : Graph.edges()) { - Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); - Build.insert({T.Src, T.Label, Act}); - } - assert(Graph.states().size() <= (1 << StateBits) && - "Graph states execceds the maximum limit!"); - auto FollowSets = followSets(G); - for (StateID SID = 0; SID < Graph.states().size(); ++SID) { - for (const Item &I : Graph.states()[SID].Items) { - // If we've just parsed the start symbol, we can accept the input. - if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) { - Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())}); - continue; - } - if (!I.hasNext()) { - // If we've reached the end of a rule A := ..., then we can reduce if - // the next token is in the follow set of A. - for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) { - assert(isToken(Follow)); - Build.insert({SID, Follow, Action::reduce(I.rule())}); - } - } - } - } - return std::move(Build).build(G.table()); -} - -} // namespace pseudo -} // namespace clang Index: clang-tools-extra/pseudo/lib/LRTable.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/LRTable.cpp @@ -1,135 +0,0 @@ -//===--- LRTable.cpp - Parsing table for LR parsers --------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang-pseudo/LRTable.h" -#include "clang-pseudo/Grammar.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang { -namespace pseudo { - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { - switch (A.kind()) { - case LRTable::Action::Shift: - return OS << llvm::formatv("shift state {0}", A.getShiftState()); - case LRTable::Action::Reduce: - return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule()); - case LRTable::Action::GoTo: - return OS << llvm::formatv("go to state {0}", A.getGoToState()); - case LRTable::Action::Accept: - return OS << "acc"; - case LRTable::Action::Sentinel: - llvm_unreachable("unexpected Sentinel action kind!"); - } - llvm_unreachable("unexpected action kind!"); -} - -std::string LRTable::dumpStatistics() const { - StateID NumOfStates = 0; - for (StateID It : States) - NumOfStates = std::max(It, NumOfStates); - return llvm::formatv(R"( -Statistics of the LR parsing table: - number of states: {0} - number of actions: {1} - size of the table (bytes): {2} -)", - NumOfStates, Actions.size(), bytes()) - .str(); -} - -std::string LRTable::dumpForTests(const Grammar &G) const { - std::string Result; - llvm::raw_string_ostream OS(Result); - StateID MaxState = 0; - for (StateID It : States) - MaxState = std::max(MaxState, It); - OS << "LRTable:\n"; - for (StateID S = 0; S <= MaxState; ++S) { - OS << llvm::formatv("State {0}\n", S); - for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { - SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal)); - for (auto A : find(S, TokID)) { - if (A.kind() == LRTable::Action::Shift) - OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n", - G.symbolName(TokID), A.getShiftState()); - else if (A.kind() == LRTable::Action::Reduce) - OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n", - G.symbolName(TokID), A.getReduceRule(), - G.dumpRule(A.getReduceRule())); - else if (A.kind() == LRTable::Action::Accept) - OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID)); - } - } - for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size(); - ++NontermID) { - if (find(S, NontermID).empty()) - continue; - OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n", - G.symbolName(NontermID), - getGoToState(S, NontermID)); - } - } - return OS.str(); -} - -llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State, - SymbolID Terminal) const { - assert(pseudo::isToken(Terminal) && "expect terminal symbol!"); - return find(State, Terminal); -} - -LRTable::StateID LRTable::getGoToState(StateID State, - SymbolID Nonterminal) const { - assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!"); - auto Result = find(State, Nonterminal); - assert(Result.size() == 1 && Result.front().kind() == Action::GoTo); - return Result.front().getGoToState(); -} - -llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const { - size_t Idx = isToken(ID) ? symbolToToken(ID) : ID; - assert(isToken(ID) ? Idx + 1 < TerminalOffset.size() - : Idx + 1 < NontermOffset.size()); - std::pair<size_t, size_t> TargetStateRange = - isToken(ID) ? std::make_pair(TerminalOffset[Idx], TerminalOffset[Idx + 1]) - : std::make_pair(NontermOffset[Idx], NontermOffset[Idx + 1]); - auto TargetedStates = - llvm::makeArrayRef(States.data() + TargetStateRange.first, - States.data() + TargetStateRange.second); - - assert(llvm::is_sorted(TargetedStates) && - "subrange of the StateIdx should be sorted!"); - const LRTable::StateID *Start = llvm::partition_point( - TargetedStates, [&Src](LRTable::StateID S) { return S < Src; }); - if (Start == TargetedStates.end()) - return {}; - const LRTable::StateID *End = Start; - while (End != TargetedStates.end() && *End == Src) - ++End; - return llvm::makeArrayRef(&Actions[Start - States.data()], - /*length=*/End - Start); -} - -LRTable::StateID LRTable::getStartState(SymbolID Target) const { - assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!"); - auto It = llvm::partition_point( - StartStates, [Target](const std::pair<SymbolID, StateID> &X) { - return X.first < Target; - }); - assert(It != StartStates.end() && It->first == Target && - "target symbol doesn't have a start state!"); - return It->second; -} - -} // namespace pseudo -} // namespace clang Index: clang-tools-extra/pseudo/lib/LRGraph.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/LRGraph.cpp @@ -1,242 +0,0 @@ -//===--- LRGraph.cpp - -------------------------------------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang-pseudo/LRGraph.h" -#include "clang-pseudo/Grammar.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" - -using ItemSet = std::vector<clang::pseudo::Item>; - -namespace llvm { -// Support clang::pseudo::Item as DenseMap keys. -template <> struct DenseMapInfo<ItemSet> { - static inline ItemSet getEmptyKey() { - return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()}; - } - static inline ItemSet getTombstoneKey() { - return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()}; - } - static unsigned getHashValue(const ItemSet &I) { - return llvm::hash_combine_range(I.begin(), I.end()); - } - static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) { - return LHS == RHS; - } -}; -} // namespace llvm - -namespace clang { -namespace pseudo { -namespace { - -struct SortByNextSymbol { - SortByNextSymbol(const Grammar &G) : G(G) {} - bool operator()(const Item &L, const Item &R) { - if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G)) - return L.next(G) < R.next(G); - if (L.hasNext() != R.hasNext()) - return L.hasNext() < R.hasNext(); // a trailing dot is minimal. - return L < R; - } - const Grammar &G; -}; - -// Computes a closure of the given item set S: -// - extends the given S to contain all options for parsing next token; -// - nonterminals after a dot are recursively expanded into the begin-state -// of all production rules that produce that nonterminal; -// -// Given -// Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ] -// Input = [ E := . T ] -// returns [ E := . T, T := . n, T := . ( E ) ] -State closure(ItemSet Queue, const Grammar &G) { - llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()}; - // We reuse the passed-by-value Queue as the final result, as it's already - // initialized to the right elements. - size_t ItIndex = 0; - while (ItIndex < Queue.size()) { - const Item &ExpandingItem = Queue[ItIndex]; - ++ItIndex; - if (!ExpandingItem.hasNext()) - continue; - - SymbolID NextSym = ExpandingItem.next(G); - if (pseudo::isToken(NextSym)) - continue; - auto RRange = G.table().Nonterminals[NextSym].RuleRange; - for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { - Item NewItem = Item::start(RID, G); - if (InQueue.insert(NewItem).second) // new - Queue.push_back(std::move(NewItem)); - } - } - Queue.shrink_to_fit(); - llvm::sort(Queue, SortByNextSymbol(G)); - return {std::move(Queue)}; -} - -// Returns all next (with a dot advanced) kernel item sets, partitioned by the -// advanced symbol. -// -// Given -// S = [ E := . a b, E := E . - T ] -// returns [ -// {id(a), [ E := a . b ]}, -// {id(-), [ E := E - . T ]} -// ] -std::vector<std::pair<SymbolID, ItemSet>> -nextAvailableKernelItems(const State &S, const Grammar &G) { - std::vector<std::pair<SymbolID, ItemSet>> Results; - llvm::ArrayRef<Item> AllItems = S.Items; - AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); }); - while (!AllItems.empty()) { - SymbolID AdvancedSymbol = AllItems.front().next(G); - auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) { - assert(I.hasNext()); - return I.next(G) == AdvancedSymbol; - }); - assert(!Batch.empty()); - AllItems = AllItems.drop_front(Batch.size()); - - // Advance a dot over the Symbol. - ItemSet Next; - for (const Item &I : Batch) - Next.push_back(I.advance()); - // sort the set to keep order determinism for hash computation. - llvm::sort(Next); - Results.push_back({AdvancedSymbol, std::move(Next)}); - } - return Results; -} - -} // namespace - -std::string Item::dump(const Grammar &G) const { - const auto &Rule = G.lookupRule(RID); - auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) { - std::vector<llvm::StringRef> Results; - for (auto SID : Syms) - Results.push_back(G.symbolName(SID)); - return Results; - }; - return llvm::formatv("{0} := {1} ⢠{2}", G.symbolName(Rule.Target), - llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), - llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " ")) - .str(); -} - -std::string State::dump(const Grammar &G, unsigned Indent) const { - std::string Result; - llvm::raw_string_ostream OS(Result); - for (const auto &Item : Items) - OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G)); - return OS.str(); -} - -std::string LRGraph::dumpForTests(const Grammar &G) const { - std::string Result; - llvm::raw_string_ostream OS(Result); - OS << "States:\n"; - for (StateID ID = 0; ID < States.size(); ++ID) { - OS << llvm::formatv("State {0}\n", ID); - OS << States[ID].dump(G, /*Indent*/ 4); - } - for (const auto &E : Edges) { - OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label), - E.Dst); - } - return OS.str(); -} - -LRGraph LRGraph::buildLR0(const Grammar &G) { - class Builder { - public: - Builder(const Grammar &G) : G(G) {} - - // Adds a given state if not existed. - std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) { - assert(llvm::is_sorted(KernelItems) && - "Item must be sorted before inserting to a hash map!"); - auto It = StatesIndex.find(KernelItems); - if (It != StatesIndex.end()) - return {It->second, false}; - States.push_back(closure(KernelItems, G)); - StateID NextStateID = States.size() - 1; - StatesIndex.insert({std::move(KernelItems), NextStateID}); - return {NextStateID, true}; - } - - void insertEdge(StateID Src, StateID Dst, SymbolID Label) { - Edges.push_back({Src, Dst, Label}); - } - - // Returns a state with the given id. - const State &find(StateID ID) const { - assert(ID < States.size()); - return States[ID]; - } - - void addStartState(SymbolID Sym, StateID State) { - StartStates.push_back({Sym, State}); - } - - LRGraph build() && { - States.shrink_to_fit(); - Edges.shrink_to_fit(); - llvm::sort(StartStates); - StartStates.shrink_to_fit(); - return LRGraph(std::move(States), std::move(Edges), - std::move(StartStates)); - } - - private: - // Key is the **kernel** item sets. - llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; - std::vector<State> States; - std::vector<Edge> Edges; - const Grammar &G; - std::vector<std::pair<SymbolID, StateID>> StartStates; - } Builder(G); - - std::vector<StateID> PendingStates; - // Initialize states with the start symbol. - auto RRange = G.table().Nonterminals[G.underscore()].RuleRange; - for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { - auto StartState = std::vector<Item>{Item::start(RID, G)}; - auto Result = Builder.insert(std::move(StartState)); - assert(Result.second && "State must be new"); - PendingStates.push_back(Result.first); - - const Rule &StartRule = G.lookupRule(RID); - assert(StartRule.Size == 1 && - "Start rule must have exactly one symbol in its body!"); - Builder.addStartState(StartRule.seq().front(), Result.first); - } - - while (!PendingStates.empty()) { - auto CurrentStateID = PendingStates.back(); - PendingStates.pop_back(); - for (auto Next : - nextAvailableKernelItems(Builder.find(CurrentStateID), G)) { - auto Insert = Builder.insert(Next.second); - if (Insert.second) // new state, insert to the pending queue. - PendingStates.push_back(Insert.first); - Builder.insertEdge(CurrentStateID, Insert.first, Next.first); - } - } - return std::move(Builder).build(); -} - -} // namespace pseudo -} // namespace clang Index: clang-tools-extra/pseudo/lib/GrammarBNF.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/GrammarBNF.cpp @@ -1,300 +0,0 @@ -//===--- GrammarBNF.cpp - build grammar from BNF files ----------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang-pseudo/Grammar.h" -#include "clang/Basic/TokenKinds.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/FormatVariadic.h" -#include <memory> -#include <utility> - -namespace clang { -namespace pseudo { - -namespace { -static const llvm::StringRef OptSuffix = "_opt"; -static const llvm::StringRef StartSymbol = "_"; - -// Builds grammar from BNF files. -class GrammarBuilder { -public: - GrammarBuilder(std::vector<std::string> &Diagnostics) - : Diagnostics(Diagnostics) {} - - std::unique_ptr<Grammar> build(llvm::StringRef BNF) { - auto Specs = eliminateOptional(parse(BNF)); - - assert(llvm::all_of(Specs, - [](const RuleSpec &R) { - if (R.Target.endswith(OptSuffix)) - return false; - return llvm::all_of( - R.Sequence, [](const RuleSpec::Element &E) { - return !E.Symbol.endswith(OptSuffix); - }); - }) && - "Optional symbols should be eliminated!"); - - auto T = std::make_unique<GrammarTable>(); - - // Assemble the name->ID and ID->nonterminal name maps. - llvm::DenseSet<llvm::StringRef> UniqueNonterminals; - llvm::DenseMap<llvm::StringRef, SymbolID> SymbolIds; - for (uint16_t I = 0; I < NumTerminals; ++I) - SymbolIds.try_emplace(T->Terminals[I], tokenSymbol(tok::TokenKind(I))); - auto Consider = [&](llvm::StringRef Name) { - if (!SymbolIds.count(Name)) - UniqueNonterminals.insert(Name); - }; - for (const auto &Spec : Specs) { - Consider(Spec.Target); - for (const RuleSpec::Element &Elt : Spec.Sequence) - Consider(Elt.Symbol); - } - llvm::for_each(UniqueNonterminals, [&T](llvm::StringRef Name) { - T->Nonterminals.emplace_back(); - T->Nonterminals.back().Name = Name.str(); - }); - assert(T->Nonterminals.size() < (1 << (SymbolBits - 1)) && - "Too many nonterminals to fit in SymbolID bits!"); - llvm::sort(T->Nonterminals, [](const GrammarTable::Nonterminal &L, - const GrammarTable::Nonterminal &R) { - return L.Name < R.Name; - }); - // Build name -> ID maps for nonterminals. - for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID) - SymbolIds.try_emplace(T->Nonterminals[SID].Name, SID); - - // Convert the rules. - T->Rules.reserve(Specs.size()); - std::vector<SymbolID> Symbols; - auto Lookup = [SymbolIds](llvm::StringRef Name) { - auto It = SymbolIds.find(Name); - assert(It != SymbolIds.end() && "Didn't find the symbol in SymbolIds!"); - return It->second; - }; - for (const auto &Spec : Specs) { - assert(Spec.Sequence.size() <= Rule::MaxElements); - Symbols.clear(); - for (const RuleSpec::Element &Elt : Spec.Sequence) - Symbols.push_back(Lookup(Elt.Symbol)); - T->Rules.push_back(Rule(Lookup(Spec.Target), Symbols)); - } - assert(T->Rules.size() < (1 << RuleBits) && - "Too many rules to fit in RuleID bits!"); - const auto &SymbolOrder = getTopologicalOrder(T.get()); - llvm::stable_sort( - T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) { - // Sorted by the topological order of the nonterminal Target. - return SymbolOrder[Left.Target] < SymbolOrder[Right.Target]; - }); - for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID) { - auto StartIt = llvm::partition_point(T->Rules, [&](const Rule &R) { - return SymbolOrder[R.Target] < SymbolOrder[SID]; - }); - RuleID Start = StartIt - T->Rules.begin(); - RuleID End = Start; - while (End < T->Rules.size() && T->Rules[End].Target == SID) - ++End; - T->Nonterminals[SID].RuleRange = {Start, End}; - } - auto G = std::make_unique<Grammar>(std::move(T)); - diagnoseGrammar(*G); - return G; - } - - // Gets topological order for nonterminal symbols. - // - // The topological order is defined as: if a *single* nonterminal A produces - // (or transitively) a nonterminal B (that said, there is a production rule - // B := A), then A is less than B. - // - // It returns the sort key for each symbol, the array is indexed by SymbolID. - std::vector<unsigned> getTopologicalOrder(GrammarTable *T) { - std::vector<std::pair<SymbolID, SymbolID>> Dependencies; - for (const auto &Rule : T->Rules) { - // if A := B, A depends on B. - if (Rule.Size == 1 && pseudo::isNonterminal(Rule.Sequence[0])) - Dependencies.push_back({Rule.Target, Rule.Sequence[0]}); - } - llvm::sort(Dependencies); - std::vector<SymbolID> Order; - // Each nonterminal state flows: NotVisited -> Visiting -> Visited. - enum State { - NotVisited, - Visiting, - Visited, - }; - std::vector<State> VisitStates(T->Nonterminals.size(), NotVisited); - std::function<void(SymbolID)> DFS = [&](SymbolID SID) -> void { - if (VisitStates[SID] == Visited) - return; - if (VisitStates[SID] == Visiting) { - Diagnostics.push_back( - llvm::formatv("The grammar contains a cycle involving symbol {0}", - T->Nonterminals[SID].Name)); - return; - } - VisitStates[SID] = Visiting; - for (auto It = llvm::lower_bound(Dependencies, - std::pair<SymbolID, SymbolID>{SID, 0}); - It != Dependencies.end() && It->first == SID; ++It) - DFS(It->second); - VisitStates[SID] = Visited; - Order.push_back(SID); - }; - for (SymbolID ID = 0; ID != T->Nonterminals.size(); ++ID) - DFS(ID); - std::vector<unsigned> Result(T->Nonterminals.size(), 0); - for (size_t I = 0; I < Order.size(); ++I) - Result[Order[I]] = I; - return Result; - } - -private: - // Text representation of a BNF grammar rule. - struct RuleSpec { - llvm::StringRef Target; - struct Element { - llvm::StringRef Symbol; // Name of the symbol - }; - std::vector<Element> Sequence; - - std::string toString() const { - std::vector<llvm::StringRef> Body; - for (const auto &E : Sequence) - Body.push_back(E.Symbol); - return llvm::formatv("{0} := {1}", Target, llvm::join(Body, " ")); - } - }; - - std::vector<RuleSpec> parse(llvm::StringRef Lines) { - std::vector<RuleSpec> Specs; - for (llvm::StringRef Line : llvm::split(Lines, '\n')) { - Line = Line.trim(); - // Strip anything coming after the '#' (comment). - Line = Line.take_while([](char C) { return C != '#'; }); - if (Line.empty()) - continue; - RuleSpec Rule; - if (parseLine(Line, Rule)) - Specs.push_back(std::move(Rule)); - } - return Specs; - } - - bool parseLine(llvm::StringRef Line, RuleSpec &Out) { - auto Parts = Line.split(":="); - if (Parts.first == Line) { // no separator in Line - Diagnostics.push_back( - llvm::formatv("Failed to parse '{0}': no separator :=", Line).str()); - return false; - } - - Out.Target = Parts.first.trim(); - Out.Sequence.clear(); - for (llvm::StringRef Chunk : llvm::split(Parts.second, ' ')) { - Chunk = Chunk.trim(); - if (Chunk.empty()) - continue; // skip empty - - Out.Sequence.push_back({Chunk}); - } - return true; - }; - - // Inlines all _opt symbols. - // For example, a rule E := id +_opt id, after elimination, we have two - // equivalent rules: - // 1) E := id + id - // 2) E := id id - std::vector<RuleSpec> eliminateOptional(llvm::ArrayRef<RuleSpec> Input) { - std::vector<RuleSpec> Results; - std::vector<RuleSpec::Element> Storage; - for (const auto &R : Input) { - eliminateOptionalTail( - R.Sequence, Storage, [&Results, &Storage, &R, this]() { - if (Storage.empty()) { - Diagnostics.push_back( - llvm::formatv("Rule '{0}' has a nullable RHS", R.toString())); - return; - } - Results.push_back({R.Target, Storage}); - }); - assert(Storage.empty()); - } - return Results; - } - void eliminateOptionalTail(llvm::ArrayRef<RuleSpec::Element> Elements, - std::vector<RuleSpec::Element> &Result, - llvm::function_ref<void()> CB) { - if (Elements.empty()) - return CB(); - auto Front = Elements.front(); - if (!Front.Symbol.endswith(OptSuffix)) { - Result.push_back(std::move(Front)); - eliminateOptionalTail(Elements.drop_front(1), Result, CB); - Result.pop_back(); - return; - } - // Enumerate two options: skip the opt symbol, or inline the symbol. - eliminateOptionalTail(Elements.drop_front(1), Result, CB); // skip - Front.Symbol = Front.Symbol.drop_back(OptSuffix.size()); // drop "_opt" - Result.push_back(std::move(Front)); - eliminateOptionalTail(Elements.drop_front(1), Result, CB); - Result.pop_back(); - } - - // Diagnoses the grammar and emit warnings if any. - void diagnoseGrammar(const Grammar &G) { - const auto &T = G.table(); - for (SymbolID SID = 0; SID < T.Nonterminals.size(); ++SID) { - auto Range = T.Nonterminals[SID].RuleRange; - if (Range.Start == Range.End) - Diagnostics.push_back( - llvm::formatv("No rules for nonterminal: {0}", G.symbolName(SID))); - llvm::StringRef NameRef = T.Nonterminals[SID].Name; - if (llvm::all_of(NameRef, llvm::isAlpha) && NameRef.upper() == NameRef) { - Diagnostics.push_back(llvm::formatv( - "Token-like name {0} is used as a nonterminal", G.symbolName(SID))); - } - } - for (RuleID RID = 0; RID + 1u < T.Rules.size(); ++RID) { - if (T.Rules[RID] == T.Rules[RID + 1]) - Diagnostics.push_back( - llvm::formatv("Duplicate rule: `{0}`", G.dumpRule(RID))); - // Warning for nullable nonterminals - if (T.Rules[RID].Size == 0) - Diagnostics.push_back( - llvm::formatv("Rule `{0}` has a nullable RHS", G.dumpRule(RID))); - } - // symbol-id -> used counts - std::vector<unsigned> UseCounts(T.Nonterminals.size(), 0); - for (const Rule &R : T.Rules) - for (SymbolID SID : R.seq()) - if (isNonterminal(SID)) - ++UseCounts[SID]; - for (SymbolID SID = 0; SID < UseCounts.size(); ++SID) - if (UseCounts[SID] == 0 && T.Nonterminals[SID].Name != StartSymbol) - Diagnostics.push_back( - llvm::formatv("Nonterminal never used: {0}", G.symbolName(SID))); - } - std::vector<std::string> &Diagnostics; -}; -} // namespace - -std::unique_ptr<Grammar> -Grammar::parseBNF(llvm::StringRef BNF, std::vector<std::string> &Diagnostics) { - Diagnostics.clear(); - return GrammarBuilder(Diagnostics).build(BNF); -} - -} // namespace pseudo -} // namespace clang Index: clang-tools-extra/pseudo/lib/Grammar.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/Grammar.cpp @@ -1,188 +0,0 @@ -//===--- Grammar.cpp - Grammar for clang pseudoparser -----------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang-pseudo/Grammar.h" -#include "clang/Basic/TokenKinds.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang { -namespace pseudo { - -Rule::Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Sequence) - : Target(Target), Size(static_cast<uint8_t>(Sequence.size())) { - assert(Sequence.size() <= Rule::MaxElements); - llvm::copy(Sequence, this->Sequence); -} - -Grammar::Grammar(std::unique_ptr<GrammarTable> Table) : T(std::move(Table)) { - Underscore = findNonterminal("_", *T); -} - -llvm::ArrayRef<Rule> Grammar::rulesFor(SymbolID SID) const { - assert(isNonterminal(SID)); - const auto &R = T->Nonterminals[SID].RuleRange; - assert(R.End <= T->Rules.size()); - return llvm::makeArrayRef(&T->Rules[R.Start], R.End - R.Start); -} - -const Rule &Grammar::lookupRule(RuleID RID) const { - assert(RID < T->Rules.size()); - return T->Rules[RID]; -} - -llvm::StringRef Grammar::symbolName(SymbolID SID) const { - if (isToken(SID)) - return T->Terminals[symbolToToken(SID)]; - return T->Nonterminals[SID].Name; -} - -std::string Grammar::dumpRule(RuleID RID) const { - std::string Result; - llvm::raw_string_ostream OS(Result); - const Rule &R = T->Rules[RID]; - OS << symbolName(R.Target) << " :="; - for (SymbolID SID : R.seq()) - OS << " " << symbolName(SID); - return Result; -} - -std::string Grammar::dumpRules(SymbolID SID) const { - assert(isNonterminal(SID)); - std::string Result; - const auto &Range = T->Nonterminals[SID].RuleRange; - for (RuleID RID = Range.Start; RID < Range.End; ++RID) - Result.append(dumpRule(RID)).push_back('\n'); - return Result; -} - -std::string Grammar::dump() const { - std::string Result; - llvm::raw_string_ostream OS(Result); - OS << "Nonterminals:\n"; - for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID) - OS << llvm::formatv(" {0} {1}\n", SID, symbolName(SID)); - OS << "Rules:\n"; - for (RuleID RID = 0; RID < T->Rules.size(); ++RID) - OS << llvm::formatv(" {0} {1}\n", RID, dumpRule(RID)); - return OS.str(); -} - -std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &G) { - std::vector<llvm::DenseSet<SymbolID>> FirstSets( - G.table().Nonterminals.size()); - auto ExpandFirstSet = [&FirstSets](SymbolID Target, SymbolID First) { - assert(isNonterminal(Target)); - if (isToken(First)) - return FirstSets[Target].insert(First).second; - bool Changed = false; - for (SymbolID SID : FirstSets[First]) - Changed |= FirstSets[Target].insert(SID).second; - return Changed; - }; - - // A rule S := T ... implies elements in FIRST(S): - // - if T is a terminal, FIRST(S) contains T - // - if T is a nonterminal, FIRST(S) contains FIRST(T) - // Since FIRST(T) may not have been fully computed yet, FIRST(S) itself may - // end up being incomplete. - // We iterate until we hit a fixed point. - // (This isn't particularly efficient, but table building isn't on the - // critical path). - bool Changed = true; - while (Changed) { - Changed = false; - for (const auto &R : G.table().Rules) - // We only need to consider the first element because symbols are - // non-nullable. - Changed |= ExpandFirstSet(R.Target, R.seq().front()); - } - return FirstSets; -} - -std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) { - auto FirstSets = firstSets(G); - std::vector<llvm::DenseSet<SymbolID>> FollowSets( - G.table().Nonterminals.size()); - // Expand the follow set of a nonterminal symbol Y by adding all from the - // given symbol set. - auto ExpandFollowSet = [&FollowSets](SymbolID Y, - const llvm::DenseSet<SymbolID> &ToAdd) { - assert(isNonterminal(Y)); - bool Changed = false; - for (SymbolID F : ToAdd) - Changed |= FollowSets[Y].insert(F).second; - return Changed; - }; - // Follow sets is computed based on the following 3 rules, the computation - // is completed at a fixed point where there is no more new symbols can be - // added to any of the follow sets. - // - // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the - // augmented grammar, in our case it is '_'. - FollowSets[G.underscore()].insert(tokenSymbol(tok::eof)); - bool Changed = true; - while (Changed) { - Changed = false; - for (const auto &R : G.table().Rules) { - // Rule 2: for a rule X := ... Y Z, we add all symbols from FIRST(Z) to - // FOLLOW(Y). - for (size_t I = 0; I + 1 < R.seq().size(); ++I) { - if (isToken(R.seq()[I])) - continue; - // We only need to consider the next symbol because symbols are - // non-nullable. - SymbolID Next = R.seq()[I + 1]; - if (isToken(Next)) - // First set for a terminal is itself. - Changed |= ExpandFollowSet(R.seq()[I], {Next}); - else - Changed |= ExpandFollowSet(R.seq()[I], FirstSets[Next]); - } - // Rule 3: for a rule X := ... Z, we add all symbols from FOLLOW(X) to - // FOLLOW(Z). - SymbolID Z = R.seq().back(); - if (isNonterminal(Z)) - Changed |= ExpandFollowSet(Z, FollowSets[R.Target]); - } - } - return FollowSets; -} - -static llvm::ArrayRef<std::string> getTerminalNames() { - static const std::vector<std::string> *TerminalNames = []() { - static std::vector<std::string> TerminalNames; - TerminalNames.reserve(NumTerminals); - for (unsigned I = 0; I < NumTerminals; ++I) { - tok::TokenKind K = static_cast<tok::TokenKind>(I); - if (const auto *Punc = tok::getPunctuatorSpelling(K)) - TerminalNames.push_back(Punc); - else - TerminalNames.push_back(llvm::StringRef(tok::getTokenName(K)).upper()); - } - return &TerminalNames; - }(); - return *TerminalNames; -} -GrammarTable::GrammarTable() : Terminals(getTerminalNames()) {} - -SymbolID findNonterminal(llvm::StringRef Name, - const clang::pseudo::GrammarTable >) { - auto It = llvm::partition_point( - GT.Nonterminals, - [&](const GrammarTable::Nonterminal &X) { return X.Name < Name; }); - assert(It != GT.Nonterminals.end() && It->Name == Name && - "Given nonterminal must exist in the grammar!"); - return It - GT.Nonterminals.begin(); -} - -} // namespace pseudo -} // namespace clang Index: clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/grammar/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_library(clangPseudoGrammar + Grammar.cpp + GrammarBNF.cpp + LRGraph.cpp + LRTable.cpp + LRTableBuild.cpp + + # FIXME: can we get rid of the clangBasic dependency? We need it for the + # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we + # could consider remimplement these functions. + LINK_LIBS + clangBasic + ) Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -0,0 +1,34 @@ +//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/cxx/CXX.h" +#include "clang-pseudo/LRTable.h" + +namespace clang { +namespace pseudo { +namespace cxx { + +static const char *CxxBNF = +#include "CXXBNF.inc" + ; + +const Grammar &getGrammar() { + static std::vector<std::string> Diags; + static std::unique_ptr<Grammar> G = Grammar::parseBNF(CxxBNF, Diags); + assert(Diags.empty()); + return *G; +} + +const LRTable &getLRTable() { + static LRTable Table = LRTable::buildSLR(getGrammar()); + return Table; +} + +} // namespace cxx +} // namespace pseudo +} // namespace clang Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -0,0 +1,9 @@ +add_clang_library(clangPseudoCXX + CXX.cpp + + DEPENDS + cxx_gen + + LINK_LIBS + clangPseudoGrammar + ) Index: clang-tools-extra/pseudo/lib/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/lib/CMakeLists.txt +++ clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -1,18 +1,17 @@ +add_subdirectory(cxx) +add_subdirectory(grammar) + set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangPseudo DirectiveTree.cpp Forest.cpp GLR.cpp - Grammar.cpp - GrammarBNF.cpp Lex.cpp - LRGraph.cpp - LRTable.cpp - LRTableBuild.cpp Token.cpp LINK_LIBS clangBasic clangLex + clangPseudoGrammar ) Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -0,0 +1,51 @@ +//===--- CXX.h - Public interfaces for the C++ grammar -----------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines public interfaces for the C++ grammar +// (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces +// of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar +// file at the runtime. +// +// We do a compilation of the C++ BNF grammar at build time, and generate +// critical data sources. The implementation of the interfaces are based on the +// generated data sources. +// +// FIXME: not everything is fully compiled yet. The implementation of the +// interfaces are still parsing the grammar file at the runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_PSEUDO_CXX_CXX_H +#define CLANG_PSEUDO_CXX_CXX_H + +#include "clang-pseudo/Grammar.h" + +namespace clang { +namespace pseudo { +class LRTable; + +namespace cxx { +// Symbol represents nonterminal symbols in the C++ grammar. +// It provides a simple uniform way to access a particular nonterminal. +enum class Symbol : SymbolID { +#define NONTERMINAL(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef NONTERMINAL +}; + +// Returns the C++ grammar. +const Grammar &getGrammar(); +// Returns the corresponding LRTable for the C++ grammar. +const LRTable &getLRTable(); + +} // namespace cxx + +} // namespace pseudo +} // namespace clang + +#endif // CLANG_PSEUDO_CXX_CXX_H Index: clang-tools-extra/pseudo/include/CMakeLists.txt =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/include/CMakeLists.txt @@ -0,0 +1,29 @@ +# The cxx.bnf grammar file +set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf) + +# Generate inc files. +set(cxx_symbols_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXSymbols.inc) +add_custom_command(OUTPUT ${cxx_symbols_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-symbol-list + > ${cxx_symbols_inc} + COMMENT "Generating nonterminal symbol file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +set(cxx_bnf_inc ${CMAKE_CURRENT_BINARY_DIR}/CXXBNF.inc) +add_custom_command(OUTPUT ${cxx_bnf_inc} + COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen" + --grammar ${cxx_bnf} + --emit-grammar-content + > ${cxx_bnf_inc} + COMMENT "Generating bnf string file for cxx grammar..." + DEPENDS pseudo-gen + VERBATIM) + +# add_custom_command does not create a new target, we need to deine a target +# explicitly, so that other targets can depend on it. +add_custom_target(cxx_gen + DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc} + VERBATIM) Index: clang-tools-extra/pseudo/gen/Main.cpp =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/gen/Main.cpp @@ -0,0 +1,85 @@ +//===--- Main.cpp - Compile BNF grammar -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a tool to compile a BNF grammar, it is used by the build system to +// generate a necessary data bits to statically construct core pieces (Grammar, +// LRTable etc) of the LR parser. +// +//===----------------------------------------------------------------------===// + +#include "clang-pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> + +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; +using llvm::cl::values; + +namespace { +enum EmitType { + EmitSymbolList, + EmitGrammarContent, +}; + +opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."), + init("")); +opt<EmitType> + Emit(desc("which information to emit:"), + values(clEnumValN(EmitSymbolList, "emit-symbol-list", + "Print nonterminal symbols (default)"), + clEnumValN(EmitGrammarContent, "emit-grammar-content", + "Print the BNF grammar content as a string"))); +std::string readOrDie(llvm::StringRef Path) { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = + llvm::MemoryBuffer::getFile(Path); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << Path + << "': " << EC.message() << "\n"; + ::exit(1); + } + return Text.get()->getBuffer().str(); +} +} // namespace + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + if (!Grammar.getNumOccurrences()) { + llvm::errs() << "Grammar file must be provided!\n"; + return 1; + } + + std::string GrammarText = readOrDie(Grammar); + std::vector<std::string> Diags; + auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 1; + } + switch (Emit) { + + case EmitSymbolList: + for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size(); + ++ID) { + std::string Name = G->symbolName(ID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID)); + } + break; + case EmitGrammarContent: + llvm::outs() << llvm::formatv("R\"bnf(\n{0})bnf\"\n", GrammarText); + break; + } + + return 0; +} Index: clang-tools-extra/pseudo/gen/CMakeLists.txt =================================================================== --- /dev/null +++ clang-tools-extra/pseudo/gen/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_executable(pseudo-gen + Main.cpp + ) + +target_link_libraries(pseudo-gen + PRIVATE + clangPseudoGrammar + ) Index: clang-tools-extra/pseudo/CMakeLists.txt =================================================================== --- clang-tools-extra/pseudo/CMakeLists.txt +++ clang-tools-extra/pseudo/CMakeLists.txt @@ -1,5 +1,7 @@ include_directories(include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(include) +add_subdirectory(gen) add_subdirectory(lib) add_subdirectory(tool) add_subdirectory(fuzzer)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits