sammccall updated this revision to Diff 438693. sammccall added a comment. Deeper version of LR0, still prototype-quality
- use hashtables instead of binary search where appropriate - simplify GLR accordingly - add fast-path reduce to reclaim some of the performance (though "reclaim" is maybe wrong - this optimization can with hindsight apply to SLR1 also) Numbers to follow. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D127357/new/ https://reviews.llvm.org/D127357 Files: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp clang-tools-extra/pseudo/include/clang-pseudo/GLR.h clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/cxx/CXX.cpp clang-tools-extra/pseudo/lib/grammar/LRTable.cpp clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp clang-tools-extra/pseudo/test/lr-build-basic.test clang-tools-extra/pseudo/test/lr-build-conflicts.test clang-tools-extra/pseudo/tool/ClangPseudo.cpp
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp =================================================================== --- clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -108,7 +108,7 @@ llvm::outs() << G->dump(); if (PrintGraph) llvm::outs() << clang::pseudo::LRGraph::buildLR0(*G).dumpForTests(*G); - auto LRTable = clang::pseudo::LRTable::buildSLR(*G); + auto LRTable = clang::pseudo::LRTable::buildLR0(*G); if (PrintTable) llvm::outs() << LRTable.dumpForTests(*G); if (PrintStatistics) Index: clang-tools-extra/pseudo/test/lr-build-conflicts.test =================================================================== --- clang-tools-extra/pseudo/test/lr-build-conflicts.test +++ clang-tools-extra/pseudo/test/lr-build-conflicts.test @@ -35,12 +35,10 @@ # TABLE-NEXT: State 1 # TABLE-NEXT: '-': shift state 3 # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER' -# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER' +# TABLE-NEXT: 'EOD': reduce by rule 1 'expr := IDENTIFIER' # TABLE-NEXT: State 3 # TABLE-NEXT: 'IDENTIFIER': shift state 2 # TABLE-NEXT: 'expr': go to state 4 # TABLE-NEXT: State 4 -# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr' +# TABLE-NEXT: 'EOD': reduce by rule 0 'expr := expr - expr' # TABLE-NEXT: '-': shift state 3 -# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr' Index: clang-tools-extra/pseudo/test/lr-build-basic.test =================================================================== --- clang-tools-extra/pseudo/test/lr-build-basic.test +++ clang-tools-extra/pseudo/test/lr-build-basic.test @@ -23,6 +23,6 @@ # TABLE-NEXT: 'id': go to state 2 # TABLE-NEXT: State 1 # TABLE-NEXT: State 2 -# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id' +# TABLE-NEXT: 'EOD': reduce by rule 1 'expr := id' # TABLE-NEXT: State 3 -# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER' +# TABLE-NEXT: 'EOD': reduce by rule 0 'id := IDENTIFIER' Index: clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp =================================================================== --- clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -9,35 +9,12 @@ #include "clang-pseudo/Grammar.h" #include "clang-pseudo/LRGraph.h" #include "clang-pseudo/LRTable.h" -#include "clang/Basic/TokenKinds.h" #include <cstdint> -namespace llvm { -template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> { - using Entry = clang::pseudo::LRTable::Entry; - static inline Entry getEmptyKey() { - static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static inline Entry getTombstoneKey() { - static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0, - clang::pseudo::LRTable::Action::sentinel()}; - return E; - } - static unsigned getHashValue(const Entry &I) { - return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque()); - } - static bool isEqual(const Entry &LHS, const Entry &RHS) { - return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol && - LHS.Act == RHS.Act; - } -}; -} // namespace llvm - namespace clang { namespace pseudo { +#if 0 class LRTable::Builder { public: Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates) @@ -134,6 +111,41 @@ } return std::move(Build).build(G.table(), Graph.states().size()); } +#endif + +LRTable LRTable::buildLR0(const Grammar &G) { + auto Graph = LRGraph::buildLR0(G); + assert(Graph.states().size() <= (1 << StateBits) && + "Graph states execceds the maximum limit!"); + + std::vector<std::pair<StateID, RuleID>> Reduce; + llvm::DenseMap<StateSymbol, StateID> Shift; + llvm::DenseMap<StateSymbol, StateID> GoTo; + + for (const auto &T : Graph.edges()) { + (isToken(T.Label) ? Shift : GoTo) + .try_emplace(StateSymbol{T.Src, T.Label}, T.Dst); + } + + for (StateID SID = 0; SID < Graph.states().size(); ++SID) { + for (const Item &I : Graph.states()[SID].Items) { + if (!I.hasNext()) { + // If we've just parsed the start symbol, this means we successfully + // parse the input. We don't add the reduce action of `_ := + // start_symbol` in the LRTable (the GLR parser handles it + // specifically). + if (G.lookupRule(I.rule()).Target == G.underscore()) + continue; + // If we've reached the end of a rule A := ..., then we can reduce. + Reduce.push_back({SID, I.rule()}); + } + } + } + return LRTable( + std::move(Shift), + OffsetTable<StateID, RuleID>(std::move(Reduce), Graph.states().size()), + std::move(GoTo), Graph.startStates()); +} } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/lib/grammar/LRTable.cpp =================================================================== --- clang-tools-extra/pseudo/lib/grammar/LRTable.cpp +++ clang-tools-extra/pseudo/lib/grammar/LRTable.cpp @@ -17,28 +17,13 @@ namespace clang { namespace pseudo { -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { - switch (A.kind()) { - case LRTable::Action::Shift: - return OS << llvm::formatv("shift state {0}", A.getShiftState()); - case LRTable::Action::Reduce: - return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule()); - case LRTable::Action::GoTo: - return OS << llvm::formatv("go to state {0}", A.getGoToState()); - case LRTable::Action::Sentinel: - llvm_unreachable("unexpected Sentinel action kind!"); - } - llvm_unreachable("unexpected action kind!"); -} - std::string LRTable::dumpStatistics() const { return llvm::formatv(R"( Statistics of the LR parsing table: - number of states: {0} - number of actions: {1} - size of the table (bytes): {2} + number of actions: shift={0} reduce={1} goto={2} + size of the table (bytes): {3} )", - StateOffset.size() - 1, Actions.size(), bytes()) + Shift.size(), Reduce.size(), Goto.size(), bytes()) .str(); } @@ -46,6 +31,7 @@ std::string Result; llvm::raw_string_ostream OS(Result); OS << "LRTable:\n"; +#if 0 for (StateID S = 0; S < StateOffset.size() - 1; ++S) { OS << llvm::formatv("State {0}\n", S); for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { @@ -69,43 +55,10 @@ getGoToState(S, NontermID)); } } +#endif return OS.str(); } -llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State, - SymbolID Terminal) const { - assert(pseudo::isToken(Terminal) && "expect terminal symbol!"); - return find(State, Terminal); -} - -LRTable::StateID LRTable::getGoToState(StateID State, - SymbolID Nonterminal) const { - assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!"); - auto Result = find(State, Nonterminal); - assert(Result.size() == 1 && Result.front().kind() == Action::GoTo); - return Result.front().getGoToState(); -} - -llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const { - assert(Src + 1u < StateOffset.size()); - std::pair<size_t, size_t> Range = - std::make_pair(StateOffset[Src], StateOffset[Src + 1]); - auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first, - Symbols.data() + Range.second); - - assert(llvm::is_sorted(SymbolRange) && - "subrange of the Symbols should be sorted!"); - const LRTable::StateID *Start = - llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; }); - if (Start == SymbolRange.end()) - return {}; - const LRTable::StateID *End = Start; - while (End != SymbolRange.end() && *End == ID) - ++End; - return llvm::makeArrayRef(&Actions[Start - Symbols.data()], - /*length=*/End - Start); -} - LRTable::StateID LRTable::getStartState(SymbolID Target) const { assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!"); auto It = llvm::partition_point( Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp =================================================================== --- clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -25,7 +25,7 @@ } const LRTable &getLRTable() { - static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar())); + static LRTable *Table = new LRTable(LRTable::buildLR0(getGrammar())); return *Table; } Index: clang-tools-extra/pseudo/lib/GLR.cpp =================================================================== --- clang-tools-extra/pseudo/lib/GLR.cpp +++ clang-tools-extra/pseudo/lib/GLR.cpp @@ -46,67 +46,35 @@ auto &GSS = Params.GSStack; // Lists of active shift, reduce actions. - std::vector<ParseStep> PendingShift, PendingReduce; - auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) { - for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) { - switch (Action.kind()) { - case LRTable::Action::Shift: - PendingShift.push_back({Head, Action}); - break; - case LRTable::Action::Reduce: - PendingReduce.push_back({Head, Action}); - break; - default: - llvm_unreachable("unexpected action kind!"); - } - } - }; StateID StartState = Params.Table.getStartState(StartSymbol); - std::vector<const GSS::Node *> NewHeads = { + std::vector<const GSS::Node *> Heads = { GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, {})}; + std::vector<const GSS::Node *> NextHeads; auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable { - assert(PendingShift.empty() && PendingReduce.empty() && - "Running GC at the wrong time!"); - + assert(NextHeads.empty() && "Running GC at the wrong time!"); if (++I != 20) // Run periodically to balance CPU and memory usage. return; I = 0; // We need to copy the list: Roots is consumed by the GC. - Roots = NewHeads; + Roots = Heads; GSS.gc(std::move(Roots)); }; for (const ForestNode &Terminal : Terminals) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n", G.symbolName(Terminal.symbol()), Terminal.symbol())); - for (const auto *Head : NewHeads) - AddSteps(Head, Terminal.symbol()); - NewHeads.clear(); - glrReduce(PendingReduce, Params, - [&](const GSS::Node * NewHead) { - // A reduce will enable more steps. - AddSteps(NewHead, Terminal.symbol()); - }); - - glrShift(PendingShift, Terminal, Params, - [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); }); + glrShift(Heads, Terminal, Params, NextHeads); + std::swap(Heads, NextHeads); + glrReduce(Heads, Params); + NextHeads.clear(); MaybeGC(); } - LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n")); - for (const auto *Heads : NewHeads) - AddSteps(Heads, tokenSymbol(tok::eof)); + LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n")); StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol); // Collect new heads created from the final reduce. - std::vector<const GSS::Node*> Heads; - glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) { - Heads.push_back(NewHead); - // A reduce will enable more steps. - AddSteps(NewHead, tokenSymbol(tok::eof)); - }); - const ForestNode *Result = nullptr; for (const auto *Head : Heads) { if (Head->State == AcceptState) { @@ -138,42 +106,40 @@ // After the shift action, the GSS is: // 0---1---2---4 // â---3---â -void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NewTok, - const ParseParams &Params, NewHeadCallback NewHeadCB) { +void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads, + const ForestNode &NewTok, const ParseParams &Params, + std::vector<const GSS::Node *> &NewHeads) { assert(NewTok.kind() == ForestNode::Terminal); - assert(llvm::all_of(PendingShift, - [](const ParseStep &Step) { - return Step.Action.kind() == LRTable::Action::Shift; - }) && - "Pending shift actions must be shift actions"); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n", Params.G.symbolName(NewTok.symbol()), - PendingShift.size())); + OldHeads.size())); // We group pending shifts by their target state so we can merge them. - llvm::stable_sort(PendingShift, [](const ParseStep &L, const ParseStep &R) { - return L.Action.getShiftState() < R.Action.getShiftState(); - }); - auto Rest = llvm::makeArrayRef(PendingShift); + llvm::SmallVector<std::pair<StateID, const GSS::Node *>, 8> Shifts; + for (const auto* H : OldHeads) + if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol())) + Shifts.push_back({*S, H}); + llvm::stable_sort(Shifts, llvm::less_first{}); + + auto Rest = llvm::makeArrayRef(Shifts); llvm::SmallVector<const GSS::Node *> Parents; while (!Rest.empty()) { // Collect the batch of PendingShift that have compatible shift states. // Their heads become TempParents, the parents of the new GSS node. - StateID NextState = Rest.front().Action.getShiftState(); + StateID NextState = Rest.front().first; Parents.clear(); for (const auto &Base : Rest) { - if (Base.Action.getShiftState() != NextState) + if (Base.first != NextState) break; - Parents.push_back(Base.Head); + Parents.push_back(Base.second); } Rest = Rest.drop_front(Parents.size()); LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n", NextState, Parents.size())); - NewHeadCB(Params.GSStack.addNode(NextState, &NewTok, Parents)); + NewHeads.push_back(Params.GSStack.addNode(NextState, &NewTok, Parents)); } - PendingShift.clear(); } namespace { @@ -231,8 +197,8 @@ // After reducing 3 by `pointer := class-name STAR` and // 2 by`enum-name := class-name STAR`: // 0--5(pointer) // 5 is goto(0, pointer) -void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params, - NewHeadCallback NewHeadCB) { +void glrReduce(std::vector<const GSS::Node *> &Heads, + const ParseParams &Params) { // There are two interacting complications: // 1. Performing one reduce can unlock new reduces on the newly-created head. // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes). @@ -311,10 +277,36 @@ }; DFS(Head, 0, DFS); }; + unsigned PoppedHeads = 0; + auto FastPath = [&]() -> bool { + if (!Sequences.empty() || Heads.size() != PoppedHeads + 1) + return false; + const GSS::Node *Head = Heads.back(); + auto Rules = Params.Table.getReduceRules(Head->State); + if (Rules.size() != 1) + return false; + const auto &Rule = Params.G.lookupRule(Rules.front()); + const GSS::Node *Base = Head; + TempSequence.resize_for_overwrite(Rule.Size); + for (unsigned I = 0; I < Rule.Size; ++I) { + if (Base->parents().size() != 1) + return false; + TempSequence[Rule.Size - 1 - I] = Base->Payload; + Base = Base->parents().front(); + } + const ForestNode *Parsed = + &Params.Forest.createSequence(Rule.Target, Rules.front(), TempSequence); + StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target); + Heads.push_back(Params.GSStack.addNode(NextState, Parsed, {Base})); + return true; + }; auto PopPending = [&] { - for (const ParseStep &Pending : PendingReduce) - Pop(Pending.Head, Pending.Action.getReduceRule()); - PendingReduce.clear(); + for (; PoppedHeads < Heads.size(); ++PoppedHeads) { + if (FastPath()) + continue; + for (RuleID R : Params.Table.getReduceRules(Heads[PoppedHeads]->State)) + Pop(Heads[PoppedHeads], R); + } }; std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases; @@ -379,9 +371,7 @@ } BasesLeft = BasesLeft.drop_front(Parents.size()); - // Invoking the callback for new heads, a real GLR parser may add new - // reduces to the PendingReduce queue! - NewHeadCB(Params.GSStack.addNode(NextState, Parsed, Parents)); + Heads.push_back(Params.GSStack.addNode(NextState, Parsed, Parents)); } PopPending(); } Index: clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h +++ clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h @@ -38,9 +38,39 @@ #include "clang-pseudo/Grammar.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/raw_ostream.h" #include <cstdint> #include <vector> +namespace clang { +namespace pseudo { +struct StateSymbol { + using StateID = uint16_t; // XXX + StateID State; + SymbolID Symbol; +}; +} // namespace pseudo +} // namespace clang +namespace llvm { +template <> struct llvm::DenseMapInfo<clang::pseudo::StateSymbol> { + using StateSymbol = clang::pseudo::StateSymbol; + static inline StateSymbol getEmptyKey() { + return StateSymbol{StateSymbol::StateID(-1), 0}; + } + static inline StateSymbol getTombstoneKey() { + return StateSymbol{StateSymbol::StateID(-1), 1}; + } + static unsigned getHashValue(const StateSymbol &Val) { + return (Val.State * 2754435761U) ^ Val.Symbol; // Knuth hash + } + static bool isEqual(const StateSymbol &LHS, const StateSymbol &RHS) { + return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol; + } +}; +} // namespace llvm namespace clang { namespace pseudo { @@ -60,79 +90,28 @@ using StateID = uint16_t; static constexpr unsigned StateBits = 13; - // Action represents the terminal and nonterminal actions, it combines the - // entry of the ACTION and GOTO tables from the LR literature. - class Action { - public: - enum Kind : uint8_t { - Sentinel = 0, - // Terminal actions, corresponding to entries of ACTION table. - - // Shift to state n: move forward with the lookahead, and push state n - // onto the state stack. - // A shift is a forward transition, and the value n is the next state that - // the parser is to enter. - Shift, - // Reduce by a rule: pop the state stack. - Reduce, - - // NOTE: there are no typical accept actions in the LRtable, accept - // actions are handled specifically in the parser -- if the parser - // reaches to a target state (which is goto(StartState, StartSymbol)) at - // the EOF token after a reduce, this indicates the input has been parsed - // as the StartSymbol successfully. - - // Nonterminal actions, corresponding to entry of GOTO table. - - // Go to state n: push state n onto the state stack. - // Similar to Shift, but it is a nonterminal forward transition. - GoTo, - }; - - static Action goTo(StateID S) { return Action(GoTo, S); } - static Action shift(StateID S) { return Action(Shift, S); } - static Action reduce(RuleID RID) { return Action(Reduce, RID); } - static Action sentinel() { return Action(Sentinel, 0); } - - StateID getShiftState() const { - assert(kind() == Shift); - return Value; - } - StateID getGoToState() const { - assert(kind() == GoTo); - return Value; - } - RuleID getReduceRule() const { - assert(kind() == Reduce); - return Value; - } - Kind kind() const { return static_cast<Kind>(K); } - - bool operator==(const Action &L) const { return opaque() == L.opaque(); } - uint16_t opaque() const { return K << ValueBits | Value; }; - - private: - Action(Kind K1, unsigned Value) : K(K1), Value(Value) {} - static constexpr unsigned ValueBits = StateBits; - static constexpr unsigned KindBits = 3; - static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID"); - static_assert(KindBits + ValueBits <= 16, - "Must be able to store kind and value efficiently"); - uint16_t K : KindBits; - // Either StateID or RuleID, depending on the Kind. - uint16_t Value : ValueBits; - }; - - // Returns all available actions for the given state on a terminal. - // Expected to be called by LR parsers. - llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const; - // Returns the state after we reduce a nonterminal. - // Expected to be called by LR parsers. - StateID getGoToState(StateID State, SymbolID Nonterminal) const; - - // Looks up available actions. - // Returns empty if no available actions in the table. - llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const; + // Return the list of reductions applicable from a given state. + // These correspond to items with the dot at the end. + llvm::ArrayRef<RuleID> getReduceRules(StateID State) const { + return Reduce.find(State); + } + // Return the state to transition to after shifting a token Terminal. + // Returns None if this terminal is not legal here. + llvm::Optional<StateID> getShiftState(StateID From, SymbolID Terminal) const { + auto It = Shift.find(StateSymbol{From, Terminal}); + if (It == Shift.end()) + return llvm::None; + return It->second; + } + // Return the state to transition to after reducing a symbol Nonterminal. + // REQUIRES: this nonterminal is legal here. + StateID getGoToState(StateID From, SymbolID Nonterminal) const { + auto It = Goto.find(StateSymbol{From, Nonterminal}); + if (It == Goto.end()) + llvm::errs() << "From=" << From << ", NT=" << Nonterminal << "\n"; + assert(It != Goto.end()); + return It->second; + } // Returns the state from which the LR parser should start to parse the input // tokens as the given StartSymbol. @@ -146,17 +125,18 @@ StateID getStartState(SymbolID StartSymbol) const; size_t bytes() const { - return sizeof(*this) + Actions.capacity() * sizeof(Action) + - Symbols.capacity() * sizeof(SymbolID) + - StateOffset.capacity() * sizeof(uint32_t); + return sizeof(*this) + llvm::capacity_in_bytes(Shift) + + llvm::capacity_in_bytes(Goto) + Reduce.bytes(); } std::string dumpStatistics() const; std::string dumpForTests(const Grammar &G) const; + static LRTable buildLR0(const Grammar &G); + +#if 0 // Build a SLR(1) parsing table. static LRTable buildSLR(const Grammar &G); - class Builder; // Represents an entry in the table, used for building the LRTable. struct Entry { @@ -166,27 +146,62 @@ }; // Build a specifid table for testing purposes. static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>); +#endif private: - // Conceptually the LR table is a multimap from (State, SymbolID) => Action. - // Our physical representation is quite different for compactness. - - // Index is StateID, value is the offset into Symbols/Actions - // where the entries for this state begin. - // Give a state id, the corresponding half-open range of Symbols/Actions is - // [StateOffset[id], StateOffset[id+1]). - std::vector<uint32_t> StateOffset; - // Parallel to Actions, the value is SymbolID (columns of the matrix). - // Grouped by the StateID, and only subranges are sorted. - std::vector<SymbolID> Symbols; - // A flat list of available actions, sorted by (State, SymbolID). - std::vector<Action> Actions; + // A multimap from K => V, where Ks form a dense range [0, n). + // A flat array stores the values: + // Values = [ values for k=0 | values for k=1 | values for k=2 | ... ] + // And another stores the index for each K: + // Keys = [ index for k=0, index for k=1, ... , Values.size()] + // Lookup[k] is Values[ Keys[k]...Keys[k+1] ] + template <typename K, typename V> struct OffsetTable { + std::vector<uint32_t> Keys; + std::vector<V> Values; + + OffsetTable(std::vector<std::pair<K, V>> &&Pairs, K Limit) { + llvm::stable_sort(Pairs, llvm::less_first{}); + Keys.reserve(Limit + 1); + Values.reserve(Pairs.size()); + unsigned I = 0; + for (K Key = 0; Key < Limit; ++Key) { + Keys.push_back(Values.size()); + while (I < Pairs.size() && Pairs[I].first == Key) + Values.push_back(Pairs[I++].second); + } + Keys.push_back(Values.size()); + assert(Values.size() == Pairs.size()); + assert(Keys.size() == Limit + 1); + } + + size_t bytes() const { + return sizeof(*this) + llvm::capacity_in_bytes(Keys) + + llvm::capacity_in_bytes(Values); + } + size_t size() const { return Values.size(); } + + llvm::ArrayRef<V> find(K Key) const { + return llvm::makeArrayRef(&Values[Keys[Key]], &Values[Keys[Key + 1]]); + } + }; + + llvm::DenseMap<StateSymbol, StateID> Shift; + OffsetTable<StateID, RuleID> Reduce; + llvm::DenseMap<StateSymbol, StateID> Goto; // A sorted table, storing the start state for each target parsing symbol. std::vector<std::pair<SymbolID, StateID>> StartStates; + + LRTable(llvm::DenseMap<StateSymbol, StateID> Shift, + OffsetTable<StateID, RuleID> Reduce, + llvm::DenseMap<StateSymbol, StateID> GoTo, + std::vector<std::pair<SymbolID, StateID>> StartStates) + : Shift(std::move(Shift)), Reduce(std::move(Reduce)), + Goto(std::move(GoTo)), StartStates(std::move(StartStates)) {} }; -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); } // namespace pseudo } // namespace clang +namespace llvm {} // namespace llvm + #endif // CLANG_PSEUDO_LRTABLE_H Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -132,34 +132,23 @@ const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params, SymbolID StartSymbol); -// An active stack head can have multiple available actions (reduce/reduce -// actions, reduce/shift actions). -// A step is any one action applied to any one stack head. -struct ParseStep { - // A specific stack head. - const GSS::Node *Head = nullptr; - // An action associated with the head. - LRTable::Action Action = LRTable::Action::sentinel(); -}; -// A callback is invoked whenever a new GSS head is created during the GLR -// parsing process (glrShift, or glrReduce). -using NewHeadCallback = std::function<void(const GSS::Node *)>; // Apply all PendingShift actions on a given GSS state, newly-created heads are // passed to the callback. // // When this function returns, PendingShift is empty. // // Exposed for testing only. -void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NextTok, - const ParseParams &Params, NewHeadCallback NewHeadCB); +void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads, + const ForestNode &NextTok, const ParseParams &Params, + std::vector<const GSS::Node *> &NewHeads); // Applies PendingReduce actions, until no more reduce actions are available. // // When this function returns, PendingReduce is empty. Calls to NewHeadCB may // add elements to PendingReduce // // Exposed for testing only. -void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params, - NewHeadCallback NewHeadCB); +void glrReduce(std::vector<const GSS::Node *> &Heads, + const ParseParams &Params); } // namespace pseudo } // namespace clang Index: clang-tools-extra/pseudo/benchmarks/Benchmark.cpp =================================================================== --- clang-tools-extra/pseudo/benchmarks/Benchmark.cpp +++ clang-tools-extra/pseudo/benchmarks/Benchmark.cpp @@ -77,11 +77,11 @@ } BENCHMARK(parseBNF); -static void buildSLR(benchmark::State &State) { +static void buildLR0(benchmark::State &State) { for (auto _ : State) - LRTable::buildSLR(*G); + LRTable::buildLR0(*G); } -BENCHMARK(buildSLR); +BENCHMARK(buildLR0); TokenStream lexAndPreprocess() { clang::LangOptions LangOpts = genericLangOpts(); @@ -129,7 +129,7 @@ BENCHMARK(preprocess); static void glrParse(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); + LRTable Table = clang::pseudo::LRTable::buildLR0(*G); SymbolID StartSymbol = *G->findNonterminal("translation-unit"); TokenStream Stream = lexAndPreprocess(); for (auto _ : State) { @@ -143,7 +143,7 @@ BENCHMARK(glrParse); static void full(benchmark::State &State) { - LRTable Table = clang::pseudo::LRTable::buildSLR(*G); + LRTable Table = clang::pseudo::LRTable::buildLR0(*G); SymbolID StartSymbol = *G->findNonterminal("translation-unit"); for (auto _ : State) { TokenStream Stream = lexAndPreprocess();
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits