Author: Sam McCall Date: 2022-06-28T21:11:09+02:00 New Revision: 743971faaf84eaa0e6310bdc22c68f34c20330f1
URL: https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1 DIFF: https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1.diff LOG: Revert "[pseudo] Add error-recovery framework & brace-based recovery" This reverts commit a0f4c10ae227a62c2a63611e64eba83f0ff0f577. This commit hadn't been reviewed yet, and was unintentionally included on another branch. Added: Modified: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/grammar/Grammar.cpp clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp clang-tools-extra/pseudo/unittests/GLRTest.cpp Removed: clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp ################################################################################ diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h index edc4b2acc0a8..a3e8611de425 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h @@ -144,26 +144,6 @@ void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads, void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead, const ParseParams &Params); -// Heuristically recover from a state where no further parsing is possible. -// -// OldHeads is the parse state at TokenIndex. -// This function consumes consumes zero or more tokens (advancing TokenIndex), -// and places any recovery states created in NewHeads. -// -// On failure, NewHeads is empty and TokenIndex is unchanged. -// -// WARNING: glrRecover acts as a "fallback shift". If it consumes no tokens, -// there is a risk of the parser falling into an infinite loop, creating an -// endless sequence of recovery nodes. -// Generally it is safe for recovery to match 0 tokens against sequence symbols -// like `statement-seq`, as the grammar won't permit another statement-seq -// immediately afterwards. However recovery strategies for `statement` should -// consume at least one token, as statements may be adjacent in the input. -void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads, - unsigned &TokenIndex, const TokenStream &Tokens, - const ParseParams &Params, - std::vector<const GSS::Node *> &NewHeads); - } // namespace pseudo } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h index 7240f5adbb03..382da41397f0 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -81,12 +81,9 @@ inline tok::TokenKind symbolToToken(SymbolID SID) { assert(SID < NumTerminals); return static_cast<tok::TokenKind>(SID); } -inline constexpr SymbolID tokenSymbol(tok::TokenKind TK) { +inline SymbolID tokenSymbol(tok::TokenKind TK) { return TokenFlag | static_cast<SymbolID>(TK); } -// Error recovery strategies. -// FIXME: these should be provided as extensions instead. -enum class RecoveryStrategy : uint8_t { None, Braces }; // An extension is a piece of native code specific to a grammar that modifies // the behavior of annotated rules. One ExtensionID is assigned for each unique @@ -110,7 +107,7 @@ struct Rule { // length to 9 (this is the longest sequence in cxx grammar). static constexpr unsigned SizeBits = 4; static constexpr unsigned MaxElements = 9; - static_assert(MaxElements < (1 << SizeBits), "Exceeds the maximum limit"); + static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit"); static_assert(SizeBits + SymbolBits <= 16, "Must be able to store symbol ID + size efficiently"); @@ -126,13 +123,6 @@ struct Rule { // being set for this rule. ExtensionID Guard = 0; - // Specifies the index within Sequence eligible for error recovery. - // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we - // should recover by finding the matching brace, and forcing stmt-seq to match - // everything between braces. - uint8_t RecoveryIndex = -1; - RecoveryStrategy Recovery = RecoveryStrategy::None; - llvm::ArrayRef<SymbolID> seq() const { return llvm::ArrayRef<SymbolID>(Sequence, Size); } diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h index f5c2cc7a1623..1b5365327431 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h @@ -137,20 +137,8 @@ class LRGraph { SymbolID Label; }; - // A possible error recovery: choose to match some tokens against a symbol. - // - // e.g. a state that contains - // stmt := { . stmt-seq [recover=braces] } - // has a Recovery { Src = S, Strategy=braces, Result=stmt-seq }. - struct Recovery { - StateID Src; // The state we are in when encountering the error. - RecoveryStrategy Strategy; // Heuristic choosing the tokens to match. - SymbolID Result; // The symbol that is produced. - }; - llvm::ArrayRef<State> states() const { return States; } llvm::ArrayRef<Edge> edges() const { return Edges; } - llvm::ArrayRef<Recovery> recoveries() const { return Recoveries; } llvm::ArrayRef<std::pair<SymbolID, StateID>> startStates() const { return StartStates; } @@ -159,15 +147,12 @@ class LRGraph { private: LRGraph(std::vector<State> States, std::vector<Edge> Edges, - std::vector<Recovery> Recoveries, std::vector<std::pair<SymbolID, StateID>> StartStates) : States(std::move(States)), Edges(std::move(Edges)), - Recoveries(std::move(Recoveries)), StartStates(std::move(StartStates)) { - } + StartStates(std::move(StartStates)) {} std::vector<State> States; std::vector<Edge> Edges; - std::vector<Recovery> Recoveries; std::vector<std::pair<SymbolID, StateID>> StartStates; }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h index c8e48dbaf309..70ce52924f11 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h @@ -121,14 +121,6 @@ class LRTable { uint16_t Value : ValueBits; }; - struct Recovery { - RecoveryStrategy Strategy; - SymbolID Result; - }; - - // Returns all available actions for the given state on a terminal. - // Expected to be called by LR parsers. - llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const; // Returns the state after we reduce a nonterminal. // Expected to be called by LR parsers. // REQUIRES: Nonterminal is valid here. @@ -159,12 +151,6 @@ class LRTable { symbolToToken(Terminal)); } - // Looks up available recovery actions if we stopped parsing in this state. - llvm::ArrayRef<Recovery> getRecovery(StateID State) const { - return llvm::makeArrayRef(Recoveries.data() + RecoveryOffset[State], - Recoveries.data() + RecoveryOffset[State + 1]); - } - // Returns the state from which the LR parser should start to parse the input // tokens as the given StartSymbol. // @@ -202,15 +188,9 @@ class LRTable { StateID State; RuleID Rule; }; - struct RecoveryEntry { - StateID State; - RecoveryStrategy Strategy; - SymbolID Result; - }; - // Build a specified table for testing purposes. - static LRTable buildForTests(const Grammar &, llvm::ArrayRef<Entry>, - llvm::ArrayRef<ReduceEntry>, - llvm::ArrayRef<RecoveryEntry> = {}); + // Build a specifid table for testing purposes. + static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>, + llvm::ArrayRef<ReduceEntry>); private: // Looks up actions stored in the generic table. @@ -242,11 +222,6 @@ class LRTable { // This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token) // as an index: Nonterminal * NUM_TOKENS + Token. llvm::BitVector FollowSets; - - // Recovery stores all recovery actions from all states. - // A given state has [RecoveryOffset[S], RecoveryOffset[S+1]). - std::vector<uint32_t> RecoveryOffset; - std::vector<Recovery> Recoveries; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &); diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp index ccb3cd770f7f..0b9cf46245a9 100644 --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -24,156 +24,6 @@ namespace clang { namespace pseudo { -namespace { - -llvm::Optional<unsigned> -findRecoveryEndpoint(RecoveryStrategy Strategy, - const GSS::Node *RecoveryNode, - const TokenStream &Tokens) { - assert(Strategy == RecoveryStrategy::Braces); - const ForestNode *LBrace = RecoveryNode->Payload; - assert(LBrace->kind() == ForestNode::Terminal && - LBrace->symbol() == tokenSymbol(tok::l_brace)); - if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair()) - return Tokens.index(*RBrace); - return llvm::None; -} - -} // namespace - -void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads, - unsigned &TokenIndex, const TokenStream &Tokens, - const ParseParams &Params, - std::vector<const GSS::Node *> &NewHeads) { - LLVM_DEBUG(llvm::dbgs() << "Recovery at token " << TokenIndex << "...\n"); - // Describes a possibility to recover by forcibly interpreting a range of - // tokens around the cursor as a nonterminal that we expected to see. - struct PlaceholderRecovery { - // The token prior to the nonterminal which is being recovered. - // This starts of the region we're skipping, so higher Position is better. - Token::Index Position; - // The nonterminal which will be created in order to recover. - SymbolID Symbol; - // The heuristic used to choose the bounds of the nonterminal to recover. - RecoveryStrategy Strategy; - - // The GSS head where we are expecting the recovered nonterminal. - const GSS::Node *RecoveryNode; - // Payload of nodes on the way back from the OldHead to the recovery node. - // These represent the partial parse that is being discarded. - // They should become the children of the opaque recovery node. - // - // There may be multiple paths leading to the same recovery node, we choose - // one arbitrarily. - std::vector<const ForestNode *> Path; - }; - std::vector<PlaceholderRecovery> Options; - - // Find recovery options by walking up the stack. - // - // This is similar to exception handling: we walk up the "frames" of nested - // rules being parsed until we find one that has a "handler" which allows us - // to determine the node bounds without parsing it. - // - // Unfortunately there's a significant diff erence: the stack contains both - // "upward" nodes (ancestor parses) and "leftward" ones. - // e.g. when parsing `int(2 + ?)`, the stack contains: - // expr := expr + . expr - which we're currently parsing - // expr := type ( . expr ) - (up) we should recover this outer expr - // expr := . type ( expr ) - (up+left) we should not recover this type! - // - // It's not obvious how to avoid collecting the latter as a recovery option. - // I think the distinction is ill-defined after merging items into states. - // For now, we have to take this into account when defining recovery rules. - // FIXME: find a more satisfying way to avoid such false recovery. - std::vector<const ForestNode *> Path; - llvm::DenseSet<const GSS::Node *> Seen; - auto DFS = [&](const GSS::Node *N, Token::Index NextTok, auto &DFS) { - if (!Seen.insert(N).second) - return; - for (auto Strategy : Params.Table.getRecovery(N->State)) { - Options.push_back(PlaceholderRecovery{ - NextTok, - Strategy.Result, - Strategy.Strategy, - N, - Path, - }); - LLVM_DEBUG(llvm::dbgs() - << "Option: recover " << Params.G.symbolName(Strategy.Result) - << " at token " << NextTok << "\n"); - } - Path.push_back(N->Payload); - for (const GSS::Node *Parent : N->parents()) - DFS(Parent, N->Payload->startTokenIndex(), DFS); - Path.pop_back(); - }; - for (auto *N : llvm::reverse(OldHeads)) - DFS(N, TokenIndex, DFS); - - // Now we select the option(s) we will use to recover. - // - // We prefer options starting further right, as these discard less code - // (e.g. we prefer to recover inner scopes rather than outer ones). - // The options also need to agree on an endpoint, so the parser has a - // consistent position afterwards. - // - // So conceptually we're sorting by the tuple (start, end), though we avoid - // computing `end` for options that can't be winners. - - // Consider options starting further right first. - // Don't drop the others yet though, we may still use them if preferred fails. - llvm::stable_sort(Options, [&](const auto &L, const auto &R) { - return L.Position > R.Position; - }); - - assert(NewHeads.empty()); // We may repeatedly populate and clear it. - llvm::Optional<Token::Range> RecoveryRange; - for (const PlaceholderRecovery &Option : Options) { - // If this starts further right than options we've already found, then - // we'll never find anything better. Skip computing End for the rest. - if (RecoveryRange && Option.Position < RecoveryRange->Begin) - break; - - auto End = - findRecoveryEndpoint(Option.Strategy, Option.RecoveryNode, Tokens); - // Only consider recovery that advances the parse. - if (!End || *End <= TokenIndex) - continue; - if (RecoveryRange) { - // If this is worse than our previous options, ignore it. - if (RecoveryRange->End < *End) - continue; - // If this is an improvement over our previous options, then drop them. - if (RecoveryRange->End > *End) - NewHeads.clear(); - } - // Create recovery nodes and heads for them in the GSS. These may be - // discarded if a better recovery is later found, but this path isn't hot. - RecoveryRange = {Option.Position, *End}; - const ForestNode &Placeholder = - Params.Forest.createOpaque(Option.Symbol, Option.Position); - const GSS::Node *NewHead = Params.GSStack.addNode( - Params.Table.getGoToState(Option.RecoveryNode->State, Option.Symbol), - &Placeholder, {Option.RecoveryNode}); - NewHeads.push_back(NewHead); - } - - // Advance the cursor, whether recovery succeeded or not. - if (RecoveryRange) { - LLVM_DEBUG({ - llvm::dbgs() << "Recovered range=" << *RecoveryRange << ":"; - for (const auto *Head : NewHeads) - llvm::dbgs() << " " << Params.G.symbolName(Head->Payload->symbol()); - llvm::dbgs() << "\n"; - }); - TokenIndex = RecoveryRange->End; - } else { - LLVM_DEBUG(llvm::dbgs() << "Recovery failed after trying " << Options.size() - << " strategies\n"); - ++TokenIndex; - } -} using StateID = LRTable::StateID; @@ -181,9 +31,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) { std::vector<std::string> ParentStates; for (const auto *Parent : N.parents()) ParentStates.push_back(llvm::formatv("{0}", Parent->State)); - OS << llvm::formatv("state {0}, parsed symbol {1}, parents {3}", N.State, - N.Payload ? N.Payload->symbol() : 0, - llvm::join(ParentStates, ", ")); + OS << llvm::formatv("state {0}, parsed symbol {1}, parents {2}", N.State, + N.Payload->symbol(), llvm::join(ParentStates, ", ")); return OS; } @@ -578,27 +427,15 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, GSS.gc(std::move(Roots)); }; // Each iteration fully processes a single token. - for (unsigned I = 0; I < Terminals.size();) { + for (unsigned I = 0; I < Terminals.size(); ++I) { LLVM_DEBUG(llvm::dbgs() << llvm::formatv( "Next token {0} (id={1})\n", G.symbolName(Terminals[I].symbol()), Terminals[I].symbol())); // Consume the token. glrShift(Heads, Terminals[I], Params, NextHeads); - - // If we weren't able to consume the token, try to skip over some tokens - // so we can keep parsing. - if (NextHeads.empty()) { - glrRecover(Heads, I, Tokens, Params, NextHeads); - if (NextHeads.empty()) - // FIXME: Ensure the `_ := start-symbol` rules have a fallback - // error-recovery strategy attached. Then this condition can't happen. - return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); - } else - ++I; - // Form nonterminals containing the token we just consumed. - SymbolID Lookahead = - I == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I].symbol(); + SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof) + : Terminals[I + 1].symbol(); Reduce(NextHeads, Lookahead); // Prepare for the next token. std::swap(Heads, NextHeads); @@ -607,35 +444,22 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, } LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n")); - // The parse was successful if we're in state `_ := start-symbol .` StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol); - auto SearchForAccept = [&](llvm::ArrayRef<const GSS::Node *> Heads) { - const ForestNode *Result = nullptr; - for (const auto *Head : Heads) { - if (Head->State == AcceptState) { - assert(Head->Payload->symbol() == StartSymbol); - assert(Result == nullptr && "multiple results!"); - Result = Head->Payload; - } + const ForestNode *Result = nullptr; + for (const auto *Head : Heads) { + if (Head->State == AcceptState) { + assert(Head->Payload->symbol() == StartSymbol); + assert(Result == nullptr && "multiple results!"); + Result = Head->Payload; } - return Result; - }; - if (auto *Result = SearchForAccept(Heads)) - return *Result; - // Failed to parse the input, attempt to run recovery. - // FIXME: this awkwardly repeats the recovery in the loop, when shift fails. - // More elegant is to include EOF in the token stream, and make the - // augmented rule: `_ := translation-unit EOF`. In this way recovery at EOF - // would not be a special case: it show up as a failure to shift the EOF - // token. - unsigned I = Terminals.size(); - glrRecover(Heads, I, Tokens, Params, NextHeads); - Reduce(NextHeads, tokenSymbol(tok::eof)); - if (auto *Result = SearchForAccept(NextHeads)) + } + if (Result) return *Result; - // We failed to parse the input, returning an opaque forest node for recovery. - // FIXME: as above, we can add fallback error handling so this is impossible. + // + // FIXME: We will need to invoke our generic error-recovery handlers when we + // reach EOF without reaching accept state, and involving the eof + // token in the above main for-loopmay be the best way to reuse the code). return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); } @@ -646,10 +470,9 @@ void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead, } const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, - llvm::ArrayRef<const Node *> Parents) { Node *Result = new (allocate(Parents.size())) - Node({State, GCParity, static_cast<uint16_t>(Parents.size())}); + Node({State, GCParity, static_cast<unsigned>(Parents.size())}); Alive.push_back(Result); ++NodesCreated; Result->Payload = Symbol; diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp index dc4d95820264..8e3bcb7afb37 100644 --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -59,11 +59,8 @@ std::string Grammar::dumpRule(RuleID RID) const { llvm::raw_string_ostream OS(Result); const Rule &R = T->Rules[RID]; OS << symbolName(R.Target) << " :="; - for (unsigned I = 0; I < R.Size; ++I) { - OS << " " << symbolName(R.Sequence[I]); - if (R.RecoveryIndex == I) - OS << " [recover=" << static_cast<unsigned>(R.Recovery) << "]"; - } + for (SymbolID SID : R.seq()) + OS << " " << symbolName(SID); if (R.Guard) OS << " [guard=" << T->AttributeValues[R.Guard] << "]"; return Result; diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp index 281c08681c92..9fbc34da7155 100644 --- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp @@ -106,17 +106,6 @@ class GrammarBuilder { assert(T->Rules.size() < (1 << RuleBits) && "Too many rules to fit in RuleID bits!"); - // Wherever RHS contains { foo }, mark foo for brace-recovery. - // FIXME: this should be grammar annotations instead. - for (auto &Rule : T->Rules) { - for (unsigned I = 2; I < Rule.Size; ++I) - if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) && - Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace) && - !isToken(Rule.Sequence[I - 1])) { - Rule.Recovery = RecoveryStrategy::Braces; - Rule.RecoveryIndex = I - 1; - } - } const auto &SymbolOrder = getTopologicalOrder(T.get()); llvm::stable_sort( T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) { diff --git a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp index 290a4f32f576..5312a675cdb8 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp @@ -120,20 +120,6 @@ nextAvailableKernelItems(const State &S, const Grammar &G) { return Results; } -std::vector<std::pair<RecoveryStrategy, SymbolID>> -availableRecovery(const State &S, const Grammar &G) { - std::vector<std::pair<RecoveryStrategy, SymbolID>> Result; - for (const Item &I : S.Items) { - const auto &Rule = G.lookupRule(I.rule()); - if (I.dot() != Rule.RecoveryIndex) - continue; - Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]}); - } - llvm::sort(Result); - Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); - return Result; -} - } // namespace std::string Item::dump(const Grammar &G) const { @@ -144,10 +130,9 @@ std::string Item::dump(const Grammar &G) const { Results.push_back(G.symbolName(SID)); return Results; }; - return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target), + return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target), llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), - llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "), - Rule.RecoveryIndex == DotPos ? " [recovery]" : "") + llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " ")) .str(); } @@ -196,11 +181,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) { Edges.push_back({Src, Dst, Label}); } - void insertRecovery(StateID Src, RecoveryStrategy Strategy, - SymbolID Result) { - Recoveries.push_back({Src, Strategy, Result}); - } - // Returns a state with the given id. const State &find(StateID ID) const { assert(ID < States.size()); @@ -214,10 +194,9 @@ LRGraph LRGraph::buildLR0(const Grammar &G) { LRGraph build() && { States.shrink_to_fit(); Edges.shrink_to_fit(); - Recoveries.shrink_to_fit(); llvm::sort(StartStates); StartStates.shrink_to_fit(); - return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries), + return LRGraph(std::move(States), std::move(Edges), std::move(StartStates)); } @@ -226,7 +205,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) { llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; std::vector<State> States; std::vector<Edge> Edges; - std::vector<Recovery> Recoveries; const Grammar &G; std::vector<std::pair<SymbolID, StateID>> StartStates; } Builder(G); @@ -247,16 +225,15 @@ LRGraph LRGraph::buildLR0(const Grammar &G) { } while (!PendingStates.empty()) { - auto StateID = PendingStates.back(); + auto CurrentStateID = PendingStates.back(); PendingStates.pop_back(); - for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) { + for (auto Next : + nextAvailableKernelItems(Builder.find(CurrentStateID), G)) { auto Insert = Builder.insert(Next.second); if (Insert.second) // new state, insert to the pending queue. PendingStates.push_back(Insert.first); - Builder.insertEdge(StateID, Insert.first, Next.first); + Builder.insertEdge(CurrentStateID, Insert.first, Next.first); } - for (auto Recovery : availableRecovery(Builder.find(StateID), G)) - Builder.insertRecovery(StateID, Recovery.first, Recovery.second); } return std::move(Builder).build(); } diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp index 3d36042044ad..59ea4ce5e327 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -11,7 +11,6 @@ #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/Support/raw_ostream.h" #include <cstdint> namespace llvm { @@ -45,7 +44,6 @@ struct LRTable::Builder { llvm::DenseSet<Entry> Entries; llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces; std::vector<llvm::DenseSet<SymbolID>> FollowSets; - std::vector<LRGraph::Recovery> Recoveries; LRTable build(unsigned NumStates) && { // E.g. given the following parsing table with 3 states and 3 terminals: @@ -90,26 +88,6 @@ struct LRTable::Builder { } Table.StartStates = std::move(StartStates); - // Error recovery entries: sort (no dups already), and build offset lookup. - llvm::sort(Recoveries, - [&](const LRGraph::Recovery &L, const LRGraph::Recovery &R) { - return std::tie(L.Src, L.Result, L.Strategy) < - std::tie(R.Src, R.Result, R.Strategy); - }); - Table.Recoveries.reserve(Recoveries.size()); - for (const auto &R : Recoveries) - Table.Recoveries.push_back({R.Strategy, R.Result}); - Table.RecoveryOffset = std::vector<uint32_t>(NumStates + 1, 0); - SortedIndex = 0; - for (StateID State = 0; State < NumStates; ++State) { - Table.RecoveryOffset[State] = SortedIndex; - while (SortedIndex < Recoveries.size() && - Recoveries[SortedIndex].Src == State) - SortedIndex++; - } - Table.RecoveryOffset[NumStates] = SortedIndex; - assert(SortedIndex == Recoveries.size()); - // Compile the follow sets into a bitmap. Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size()); for (SymbolID NT = 0; NT < FollowSets.size(); ++NT) @@ -136,8 +114,7 @@ struct LRTable::Builder { }; LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries, - llvm::ArrayRef<ReduceEntry> Reduces, - llvm::ArrayRef<RecoveryEntry> Recoveries) { + llvm::ArrayRef<ReduceEntry> Reduces) { StateID MaxState = 0; for (const auto &Entry : Entries) { MaxState = std::max(MaxState, Entry.State); @@ -151,8 +128,6 @@ LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries, for (const ReduceEntry &E : Reduces) Build.Reduces[E.State].insert(E.Rule); Build.FollowSets = followSets(G); - for (const auto &R : Recoveries) - Build.Recoveries.push_back({R.State, R.Strategy, R.Result}); return std::move(Build).build(/*NumStates=*/MaxState + 1); } @@ -160,7 +135,6 @@ LRTable LRTable::buildSLR(const Grammar &G) { auto Graph = LRGraph::buildLR0(G); Builder Build; Build.StartStates = Graph.startStates(); - Build.Recoveries = Graph.recoveries(); for (const auto &T : Graph.edges()) { Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst); Build.Entries.insert({T.Src, T.Label, Act}); diff --git a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp index bcfdff192b99..6d7a6823d0bf 100644 --- a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp +++ b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp @@ -2,7 +2,7 @@ class Foo { public: }; -// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification [recover=1] } +// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification } // CHECK-NEXT: ├─class-head := class-key class-head-name // CHECK-NEXT: │ ├─class-key~CLASS := tok[0] // CHECK-NEXT: │ └─class-head-name~IDENTIFIER := tok[1] diff --git a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp b/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp deleted file mode 100644 index a8bbe88dadd8..000000000000 --- a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s -auto x = { complete garbage }; -// CHECK: translation-unit~simple-declaration -// CHECK-NEXT: ├─decl-specifier-seq~AUTO := tok[0] -// CHECK-NEXT: ├─init-declarator-list~init-declarator -// CHECK-NEXT: │ ├─declarator~IDENTIFIER := tok[1] -// CHECK-NEXT: │ └─initializer~brace-or-equal-initializer -// CHECK-NEXT: │ ├─= := tok[2] -// CHECK-NEXT: │ └─initializer-clause~braced-init-list -// CHECK-NEXT: │ ├─{ := tok[3] -// CHECK-NEXT: │ ├─initializer-list := <opaque> -// CHECK-NEXT: │ └─} := tok[6] -// CHECK-NEXT: └─; := tok[7] diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp index a37fda3fe2f5..846f2dbf1677 100644 --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang-pseudo/GLR.h" -#include "clang-pseudo/Bracket.h" #include "clang-pseudo/Token.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang/Basic/LangOptions.h" @@ -33,13 +32,11 @@ namespace { using Action = LRTable::Action; using testing::AllOf; using testing::ElementsAre; -using testing::IsEmpty; using testing::UnorderedElementsAre; MATCHER_P(state, StateID, "") { return arg->State == StateID; } MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; } MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; } -MATCHER_P(start, Start, "") { return arg->Payload->startTokenIndex() == Start; } testing::Matcher<const GSS::Node *> parents(llvm::ArrayRef<const GSS::Node *> Parents) { @@ -241,9 +238,9 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) { /*State=*/1, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0}); const auto *GSSNode2 = GSStack.addNode( /*State=*/2, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0}); - const auto *GSSNode3 = GSStack.addNode( - /*State=*/3, /*ForestNode=*/ClassNameNode, - /*Parents=*/{GSSNode1}); + const auto *GSSNode3 = + GSStack.addNode(/*State=*/3, /*ForestNode=*/ClassNameNode, + /*Parents=*/{GSSNode1}); const auto *GSSNode4 = GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode, /*Parents=*/{GSSNode2}); @@ -366,124 +363,6 @@ TEST_F(GLRTest, ReduceLookahead) { EXPECT_THAT(Heads, ElementsAre(GSSNode1)); } -TEST_F(GLRTest, Recover) { - // Recovery while parsing "word" inside braces. - // Before: - // 0--1({)--2(?) - // After recovering a `word` at state 1: - // 0--3(word) // 3 is goto(1, word) - buildGrammar({"word"}, {}); - LRTable Table = LRTable::buildForTests( - G, {{/*State=*/1, id("word"), Action::goTo(3)}}, /*Reduce=*/{}, - /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("word")}}); - - auto *LBrace = &Arena.createTerminal(tok::l_brace, 0); - auto *Question1 = &Arena.createTerminal(tok::question, 1); - const auto *Root = GSStack.addNode(0, nullptr, {}); - const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root}); - const auto *AfterQuestion1 = GSStack.addNode(2, Question1, {OpenedBraces}); - - // Need a token stream with paired braces so the strategy works. - clang::LangOptions LOptions; - TokenStream Tokens = cook(lex("{ ? ? ? }", LOptions), LOptions); - pairBrackets(Tokens); - std::vector<const GSS::Node *> NewHeads; - - unsigned TokenIndex = 2; - glrRecover({AfterQuestion1}, TokenIndex, Tokens, {G, Table, Arena, GSStack}, - NewHeads); - EXPECT_EQ(TokenIndex, 4u) << "should skip ahead to matching brace"; - EXPECT_THAT(NewHeads, ElementsAre( - AllOf(state(3), parsedSymbolID(id("word")), - parents({OpenedBraces}), start(1u)))); - EXPECT_EQ(NewHeads.front()->Payload->kind(), ForestNode::Opaque); - - // Test recovery failure: omit closing brace so strategy fails - TokenStream NoRBrace = cook(lex("{ ? ? ? ?", LOptions), LOptions); - pairBrackets(NoRBrace); - NewHeads.clear(); - TokenIndex = 2; - glrRecover({AfterQuestion1}, TokenIndex, NoRBrace, - {G, Table, Arena, GSStack}, NewHeads); - EXPECT_EQ(TokenIndex, 3u) << "should advance by 1 by default"; - EXPECT_THAT(NewHeads, IsEmpty()); -} - -TEST_F(GLRTest, RecoverRightmost) { - // In a nested block structure, we recover at the innermost possible block. - // Before: - // 0--1({)--1({)--1({) - // After recovering a `block` at inside the second braces: - // 0--1({)--2(body) // 2 is goto(1, body) - buildGrammar({"body"}, {}); - LRTable Table = LRTable::buildForTests( - G, {{/*State=*/1, id("body"), Action::goTo(2)}}, /*Reduce=*/{}, - /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("body")}}); - - clang::LangOptions LOptions; - // Innermost brace is unmatched, to test fallback to next brace. - TokenStream Tokens = cook(lex("{ { { ? ? } }", LOptions), LOptions); - Tokens.tokens()[0].Pair = 5; - Tokens.tokens()[1].Pair = 4; - Tokens.tokens()[4].Pair = 1; - Tokens.tokens()[5].Pair = 0; - - auto *Brace1 = &Arena.createTerminal(tok::l_brace, 0); - auto *Brace2 = &Arena.createTerminal(tok::l_brace, 1); - auto *Brace3 = &Arena.createTerminal(tok::l_brace, 2); - const auto *Root = GSStack.addNode(0, nullptr, {}); - const auto *In1 = GSStack.addNode(1, Brace1, {Root}); - const auto *In2 = GSStack.addNode(1, Brace2, {In1}); - const auto *In3 = GSStack.addNode(1, Brace3, {In2}); - - unsigned TokenIndex = 3; - std::vector<const GSS::Node *> NewHeads; - glrRecover({In3}, TokenIndex, Tokens, {G, Table, Arena, GSStack}, NewHeads); - EXPECT_EQ(TokenIndex, 5u); - EXPECT_THAT(NewHeads, ElementsAre(AllOf(state(2), parsedSymbolID(id("body")), - parents({In2}), start(2u)))); -} - -TEST_F(GLRTest, RecoverAlternatives) { - // Recovery inside braces with multiple equally good options - // Before: - // 0--1({) - // After recovering either `word` or `number` inside the braces: - // 0--1({)--2(word) // 2 is goto(1, word) - // └--3(number) // 3 is goto(1, number) - buildGrammar({"number", "word"}, {}); - LRTable Table = LRTable::buildForTests( - G, - { - {/*State=*/1, id("number"), Action::goTo(2)}, - {/*State=*/1, id("word"), Action::goTo(3)}, - }, - /*Reduce=*/{}, - /*Recovery=*/ - { - {/*State=*/1, RecoveryStrategy::Braces, id("number")}, - {/*State=*/1, RecoveryStrategy::Braces, id("word")}, - }); - auto *LBrace = &Arena.createTerminal(tok::l_brace, 0); - const auto *Root = GSStack.addNode(0, nullptr, {}); - const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root}); - - clang::LangOptions LOptions; - TokenStream Tokens = cook(lex("{ ? }", LOptions), LOptions); - pairBrackets(Tokens); - std::vector<const GSS::Node *> NewHeads; - unsigned TokenIndex = 1; - - glrRecover({OpenedBraces}, TokenIndex, Tokens, {G, Table, Arena, GSStack}, - NewHeads); - EXPECT_EQ(TokenIndex, 2u); - EXPECT_THAT(NewHeads, - UnorderedElementsAre(AllOf(state(2), parsedSymbolID(id("number")), - parents({OpenedBraces}), start(1u)), - AllOf(state(3), parsedSymbolID(id("word")), - parents({OpenedBraces}), start(1u)))); -} - TEST_F(GLRTest, PerfectForestNodeSharing) { // Run the GLR on a simple grammar and test that we build exactly one forest // node per (SymbolID, token range). @@ -552,40 +431,6 @@ TEST_F(GLRTest, GLRReduceOrder) { "[ 0, end) └─IDENTIFIER := tok[0]\n"); } -TEST_F(GLRTest, RecoveryEndToEnd) { - // Simple example of brace-based recovery showing: - // - recovered region includes tokens both ahead of and behind the cursor - // - multiple possible recovery rules - // - recovery from outer scopes is rejected - build(R"bnf( - _ := block - - block := { block } - block := { numbers } - numbers := NUMERIC_CONSTANT NUMERIC_CONSTANT - )bnf"); - auto LRTable = LRTable::buildSLR(G); - clang::LangOptions LOptions; - TokenStream Tokens = cook(lex("{ { 42 ? } }", LOptions), LOptions); - pairBrackets(Tokens); - - const ForestNode &Parsed = - glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("block")); - EXPECT_EQ(Parsed.dumpRecursive(G), - "[ 0, end) block := { block [recover=1] }\n" - "[ 0, 1) ├─{ := tok[0]\n" - "[ 1, 5) ├─block := <ambiguous>\n" - "[ 1, 5) │ ├─block := { block [recover=1] }\n" - "[ 1, 2) │ │ ├─{ := tok[1]\n" - "[ 2, 4) │ │ ├─block := <opaque>\n" - "[ 4, 5) │ │ └─} := tok[4]\n" - "[ 1, 5) │ └─block := { numbers [recover=1] }\n" - "[ 1, 2) │ ├─{ := tok[1]\n" - "[ 2, 4) │ ├─numbers := <opaque>\n" - "[ 4, 5) │ └─} := tok[4]\n" - "[ 5, end) └─} := tok[5]\n"); -} - TEST_F(GLRTest, NoExplicitAccept) { build(R"bnf( _ := test _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits