[PATCH] D126536: [pseudo] Add grammar annotations support.

Haojian Wu via Phabricator via cfe-commits Fri, 27 May 2022 06:28:16 -0700

hokein created this revision.
hokein added a reviewer: sammccall.
Herald added subscribers: mgrang, mgorny.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.


- Add annotation handling ([key=value]) in the BNF grammar parser;
- Define and setup the API in the grammar for attributes;
- Implement a builtin guard for two simple c++ contexual-override/final use 
cases;


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126536

Files:
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
  clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cxx.bnf
  clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
  clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/GrammarTest.cpp

Index: clang-tools-extra/pseudo/unittests/GrammarTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -99,6 +99,14 @@
   EXPECT_LT(ruleFor("x"), ruleFor("_"));
 }
 
+TEST_F(GrammarTest, Annotation) {
+  build(R"bnf(
+    _ := IDENTIFIER [guard=override]
+  )bnf");
+  ASSERT_TRUE(Diags.empty());
+  EXPECT_NE(G->lookupRule(ruleFor("_")).Guard, NoneAttribute);
+}
+
 TEST_F(GrammarTest, Diagnostics) {
   build(R"cpp(
     _ := ,_opt
@@ -110,17 +118,25 @@
     # cycle
     a := b
     b := a
+
+    _ := IDENTIFIER [guard=override;unknown=value]
+    _ := IDENTIFIER [guard=override] ;
+    _ := [guard=override] ;
   )cpp");
 
   EXPECT_EQ(G->underscore(), id("_"));
-  EXPECT_THAT(Diags, UnorderedElementsAre(
-                         "Rule '_ := ,_opt' has a nullable RHS",
-                         "Rule 'null := ' has a nullable RHS",
-                         "No rules for nonterminal: undefined-sym",
-                         "Failed to parse 'invalid': no separator :=",
-                         "Token-like name IDENFIFIE is used as a nonterminal",
-                         "No rules for nonterminal: IDENFIFIE",
-                         "The grammar contains a cycle involving symbol a"));
+  EXPECT_THAT(
+      Diags, UnorderedElementsAre(
+                 "Rule '_ := ,_opt' has a nullable RHS",
+                 "Rule 'null := ' has a nullable RHS",
+                 "No rules for nonterminal: undefined-sym",
+                 "Failed to parse 'invalid': no separator :=",
+                 "Token-like name IDENFIFIE is used as a nonterminal",
+                 "No rules for nonterminal: IDENFIFIE",
+                 "The grammar contains a cycle involving symbol a",
+                 "Unknown attribute key 'unknown'",
+                 "Attribute 'guard' can only be specified at the end of a rule",
+                 "Attributes are not allowed at the beginning of the rule"));
 }
 
 TEST_F(GrammarTest, FirstAndFollowSets) {
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -158,7 +158,7 @@
   std::vector<ParseStep> PendingReduce = {
       {GSSNode1, Action::reduce(ruleFor("class-name"))},
       {GSSNode1, Action::reduce(ruleFor("enum-name"))}};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, TokenStream(), {*G, Table, Arena, GSStack},
             captureNewHeads());
   EXPECT_THAT(NewHeadResults,
               testing::UnorderedElementsAre(
@@ -195,7 +195,7 @@
        {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)}});
   std::vector<ParseStep> PendingReduce = {
       {GSSNode4, Action::reduce(ruleFor("ptr-operator"))}};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, TokenStream(), {*G, Table, Arena, GSStack},
             captureNewHeads());
 
   EXPECT_THAT(NewHeadResults,
@@ -250,7 +250,7 @@
       {
           GSSNode4, Action::reduce(/*RuleID=*/1) // type-name := enum-name
       }};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, TokenStream(), {*G, Table, Arena, GSStack},
             captureNewHeads());
 
   // Verify that the stack heads are joint at state 5 after reduces.
@@ -306,7 +306,7 @@
       {
           GSSNode4, Action::reduce(/*RuleID=*/1) // pointer := enum-name *
       }};
-  glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
+  glrReduce(PendingReduce, TokenStream(), {*G, Table, Arena, GSStack},
             captureNewHeads());
 
   EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(
Index: clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -47,16 +47,28 @@
     // Assemble the name->ID and ID->nonterminal name maps.
     llvm::DenseSet<llvm::StringRef> UniqueNonterminals;
     llvm::DenseMap<llvm::StringRef, SymbolID> SymbolIds;
+
+    llvm::DenseSet<llvm::StringRef> UniqueAttrKeys;
+    llvm::DenseSet<llvm::StringRef> UniqueAttrValues = {""};
+    llvm::DenseMap<llvm::StringRef, AttributeID> AttrValueIDs;
+
     for (uint16_t I = 0; I < NumTerminals; ++I)
       SymbolIds.try_emplace(T->Terminals[I], tokenSymbol(tok::TokenKind(I)));
     auto Consider = [&](llvm::StringRef Name) {
       if (!SymbolIds.count(Name))
         UniqueNonterminals.insert(Name);
     };
+    auto ConsiderAttrValue = [&](llvm::StringRef AttrValueName) {
+      if (!AttrValueIDs.count(AttrValueName))
+        UniqueAttrValues.insert(AttrValueName);
+    };
     for (const auto &Spec : Specs) {
       Consider(Spec.Target);
-      for (const RuleSpec::Element &Elt : Spec.Sequence)
+      for (const RuleSpec::Element &Elt : Spec.Sequence) {
         Consider(Elt.Symbol);
+        for (const auto& Attr : Elt.Attributes)
+          ConsiderAttrValue(Attr.second);
+      }
     }
     llvm::for_each(UniqueNonterminals, [&T](llvm::StringRef Name) {
       T->Nonterminals.emplace_back();
@@ -68,6 +80,13 @@
                                    const GrammarTable::Nonterminal &R) {
       return L.Name < R.Name;
     });
+
+    llvm::for_each(UniqueAttrValues, [&T](llvm::StringRef Name) {
+      T->Attributes.emplace_back();
+      T->Attributes.back() = Name.str();
+    });
+    llvm::sort(T->Attributes);
+
     // Build name -> ID maps for nonterminals.
     for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)
       SymbolIds.try_emplace(T->Nonterminals[SID].Name, SID);
@@ -75,17 +94,42 @@
     // Convert the rules.
     T->Rules.reserve(Specs.size());
     std::vector<SymbolID> Symbols;
+    std::vector<AttributeID> Attributes;
     auto Lookup = [SymbolIds](llvm::StringRef Name) {
       auto It = SymbolIds.find(Name);
       assert(It != SymbolIds.end() && "Didn't find the symbol in SymbolIds!");
       return It->second;
     };
+    auto LookupAttr = [](llvm::StringRef Name,
+                         llvm::ArrayRef<std::string> SortedArray) {
+      assert(llvm::is_sorted(SortedArray) && "Array is not sorted!");
+      const auto *It = llvm::partition_point(
+          SortedArray, [&](llvm::StringRef X) { return X < Name; });
+      assert(It != SortedArray.end() && *It == Name &&
+             "Didn't find the symbol in AttrValues!");
+      return It - SortedArray.begin();
+    };
     for (const auto &Spec : Specs) {
       assert(Spec.Sequence.size() <= Rule::MaxElements);
       Symbols.clear();
-      for (const RuleSpec::Element &Elt : Spec.Sequence)
+      AttributeID Guard = NoneAttribute;
+      for (const RuleSpec::Element &Elt : Spec.Sequence) {
         Symbols.push_back(Lookup(Elt.Symbol));
-      T->Rules.push_back(Rule(Lookup(Spec.Target), Symbols));
+
+        for (const auto& AttrKeyAndValue : Elt.Attributes) {
+          switch(AttrKeyAndValue.first) {
+          case AttributeKey::Guard:
+            if (&Elt != &Spec.Sequence.back()) {
+              Diagnostics.push_back("Attribute 'guard' can only be specified "
+                                    "at the end of a rule");
+              continue;
+            }
+            Guard = LookupAttr(AttrKeyAndValue.second, T->Attributes);
+            break;
+          }
+        }
+      }
+      T->Rules.push_back(Rule(Lookup(Spec.Target), Symbols, Guard));
     }
     assert(T->Rules.size() < (1 << RuleBits) &&
            "Too many rules to fit in RuleID bits!");
@@ -164,6 +208,9 @@
     llvm::StringRef Target;
     struct Element {
       llvm::StringRef Symbol; // Name of the symbol
+      // Attributes that are associated to the sequence symbol or rule.
+      std::vector<std::pair<AttributeKey, llvm::StringRef/*Value*/>>
+          Attributes;
     };
     std::vector<Element> Sequence;
 
@@ -204,11 +251,46 @@
       Chunk = Chunk.trim();
       if (Chunk.empty())
         continue; // skip empty
+      if (Chunk.startswith("[") && Chunk.endswith("]")) {
+        if (Out.Sequence.empty()) {
+          Diagnostics.push_back(
+              "Attributes are not allowed at the beginning of the rule");
+          continue;
+        }
+        parseAttribute(Chunk, Out.Sequence.back().Attributes);
+        continue;
+      }
 
       Out.Sequence.push_back({Chunk});
     }
     return true;
-  };
+  }
+
+  bool parseAttribute(
+      llvm::StringRef Content,
+      std::vector<std::pair<AttributeKey, llvm::StringRef>> &Out) {
+    assert(Content.startswith("[") && Content.endswith("]"));
+    for (llvm::StringRef Attr :
+         llvm::split(Content.drop_front().drop_back(), ';')) {
+      auto KeyAndValue = Attr.split('=');
+      if (KeyAndValue.first == Attr) { // no separator in Line
+        Diagnostics.push_back(
+            llvm::formatv("Failed to parse attribute '{0}': no separator =",
+                          Attr)
+                .str());
+        return false;
+      }
+      if (KeyAndValue.first == "guard") {
+        Out.push_back({AttributeKey::Guard, KeyAndValue.second.trim()});
+        continue;
+      }
+
+      Diagnostics.push_back(
+          llvm::formatv("Unknown attribute key '{0}'", KeyAndValue.first)
+              .str());
+    }
+    return true;
+  }
 
   // Inlines all _opt symbols.
   // For example, a rule E := id +_opt id, after elimination, we have two
Index: clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -17,8 +17,10 @@
 namespace clang {
 namespace pseudo {
 
-Rule::Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Sequence)
-    : Target(Target), Size(static_cast<uint8_t>(Sequence.size())) {
+Rule::Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Sequence,
+           AttributeID Guard)
+    : Target(Target), Size(static_cast<uint8_t>(Sequence.size())),
+      Guard(Guard) {
   assert(Sequence.size() <= Rule::MaxElements);
   llvm::copy(Sequence, this->Sequence);
 }
@@ -61,6 +63,8 @@
   OS << symbolName(R.Target) << " :=";
   for (SymbolID SID : R.seq())
     OS << " " << symbolName(SID);
+  if (R.Guard)
+    OS << " [guard=" << T->Attributes[R.Guard] << "]";
   return Result;
 }
 
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,7 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/Grammar.h"
 #include "clang-pseudo/LRTable.h"
+#include "clang/Basic/TokenKinds.h"
 
 namespace clang {
 namespace pseudo {
@@ -29,6 +32,23 @@
   return *Table;
 }
 
+bool guard(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
+           const TokenStream &Tokens, const Grammar &G) {
+  auto &Rule = G.lookupRule(RID);
+  assert(Rule.Guard != NoneAttribute);
+  if (Rule.Guard == static_cast<AttributeID>(Atrribute::Override)) {
+    assert(RHS.size() == 1 &&
+           RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+    return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+  }
+  if (Rule.Guard == static_cast<AttributeID>(Atrribute::Final)) {
+    assert(RHS.size() == 1 &&
+           RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+    return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
+  }
+  return true;
+}
+
 } // namespace cxx
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -6,4 +6,5 @@
 
   LINK_LIBS
   clangPseudoGrammar
+  clangPseudo
   )
Index: clang-tools-extra/pseudo/lib/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx.bnf
@@ -739,8 +739,8 @@
 
 #! Contextual keywords -- clang lexer always lexes them as identifier tokens.
 #! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER
-contextual-final := IDENTIFIER
+contextual-override := IDENTIFIER [guard=Override]
+contextual-final := IDENTIFIER [guard=Final]
 contextual-zero := NUMERIC_CONSTANT
 module-keyword := IDENTIFIER
 import-keyword := IDENTIFIER
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -72,11 +72,10 @@
     for (const auto *Head : NewHeads)
       AddSteps(Head, Terminal.symbol());
     NewHeads.clear();
-    glrReduce(PendingReduce, Params,
-              [&](const GSS::Node * NewHead) {
-                // A reduce will enable more steps.
-                AddSteps(NewHead, Terminal.symbol());
-              });
+    glrReduce(PendingReduce, Tokens, Params, [&](const GSS::Node *NewHead) {
+      // A reduce will enable more steps.
+      AddSteps(NewHead, Terminal.symbol());
+    });
 
     glrShift(PendingShift, Terminal, Params,
              [&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); });
@@ -84,11 +83,10 @@
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n"));
   for (const auto *Heads : NewHeads)
     AddSteps(Heads, tokenSymbol(tok::eof));
-  glrReduce(PendingReduce, Params,
-            [&](const GSS::Node * NewHead) {
-              // A reduce will enable more steps.
-              AddSteps(NewHead, tokenSymbol(tok::eof));
-            });
+  glrReduce(PendingReduce, Tokens, Params, [&](const GSS::Node *NewHead) {
+    // A reduce will enable more steps.
+    AddSteps(NewHead, tokenSymbol(tok::eof));
+  });
 
   if (!PendingAccept.empty()) {
     LLVM_DEBUG({
@@ -211,8 +209,8 @@
 //   After reducing 3 by `pointer := class-name STAR` and
 //                  2 by`enum-name := class-name STAR`:
 //     0--5(pointer)       // 5 is goto(0, pointer)
-void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
-               NewHeadCallback NewHeadCB) {
+void glrReduce(std::vector<ParseStep> &PendingReduce, const TokenStream &Tokens,
+               const ParseParams &Params, NewHeadCallback NewHeadCB) {
   // There are two interacting complications:
   // 1.  Performing one reduce can unlock new reduces on the newly-created head.
   // 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).
@@ -282,6 +280,10 @@
     TempSequence.resize_for_overwrite(Rule.Size);
     auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
       if (I == Rule.Size) {
+        if (Rule.Guard && Params.Guard &&
+            !Params.Guard(RID, TempSequence, Tokens, Params.G))
+          return;
+
         F.Start = TempSequence.front()->startTokenIndex();
         Bases.emplace(F, N);
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -28,13 +28,25 @@
 namespace clang {
 namespace pseudo {
 class LRTable;
+class ForestNode;
+class TokenStream;
 
 namespace cxx {
 // Symbol represents nonterminal symbols in the C++ grammar.
 // It provides a simple uniform way to access a particular nonterminal.
 enum class Symbol : SymbolID {
 #define NONTERMINAL(X, Y) X = Y,
+#define ATTRIBUTE(X, Y)
 #include "CXXSymbols.inc"
+#undef ATTRIBUTE
+#undef NONTERMINAL
+};
+
+enum class Atrribute : AttributeID {
+#define NONTERMINAL(X, Y)
+#define ATTRIBUTE(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef ATTRIBUTE
 #undef NONTERMINAL
 };
 
@@ -43,6 +55,14 @@
 // Returns the corresponding LRTable for the C++ grammar.
 const LRTable &getLRTable();
 
+// Implements a C++ specific "guard" attribute.
+// It determines whether a reduction of a rule is performed in the GLR parser.
+//
+// Returns false if the reduction is not performed (this parsing branch in GLR
+// will die).
+bool guard(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
+           const TokenStream &Tokens, const Grammar &G);
+
 } // namespace cxx
 
 } // namespace pseudo
Index: clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
@@ -19,6 +19,19 @@
 //  production rules. A rule is of BNF form (AAA := BBB CCC). A symbol is either
 //  nonterminal or terminal, identified by a SymbolID.
 //
+//  Attributes have the syntax form of [guard=value;key=value], they are
+//  associated with a grammar symbol (on the right-hand side of the symbol) or
+//  a grammar rule (at the end of the rule body).
+//
+//  Attributes provide a way to inject custom code into the general GLR
+//  parser. For example,  we have a rule in the C++ grammar:
+//
+//    contextual-override := IDENTIFIER [guard=Override]
+//
+//  This rule is guarded -- the reduction of this rule will be conducted by the
+//  GLR parser only if the IDENTIFIER content is `override` (see guard
+//  implementationin CXX.h).
+//
 //  Notions about the BNF grammar:
 //  - "_" is the start symbol of the augmented grammar;
 //  - single-line comment is supported, starting with a #
@@ -69,6 +82,20 @@
   return TokenFlag | static_cast<SymbolID>(TK);
 }
 
+// An attribute is specified in the grammar like [guard=value].
+// Defines the built-in attribute keys.
+enum class AttributeKey : uint8_t {
+  // A guard controls whether a reduction of a rule will be conducted by the GLR
+  // parser.
+  Guard,
+};
+// An AttributeID uniquely identifies an attribute in a grammar.
+// It is the index into a table of attribute values.
+// NOTE: value among attributes must be unique even within different keys!
+using AttributeID = uint16_t;
+// A sentinel attributeID indicating no attributes, by default.
+static constexpr AttributeID NoneAttribute = 0;
+
 // A RuleID uniquely identifies a production rule in a grammar.
 // It is an index into a table of rules.
 using RuleID = uint16_t;
@@ -79,7 +106,8 @@
 //   expression := a b c
 //   ^Target       ^Sequence
 struct Rule {
-  Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Seq);
+  Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Seq,
+       AttributeID Guard = NoneAttribute);
 
   // We occupy 4 bits for the sequence, in theory, it can be at most 2^4 tokens
   // long, however, we're stricter in order to reduce the size, we limit the max
@@ -96,11 +124,13 @@
   uint8_t Size : SizeBits; // Size of the Sequence
   SymbolID Sequence[MaxElements];
 
+  AttributeID Guard = NoneAttribute;
+
   llvm::ArrayRef<SymbolID> seq() const {
     return llvm::ArrayRef<SymbolID>(Sequence, Size);
   }
   friend bool operator==(const Rule &L, const Rule &R) {
-    return L.Target == R.Target && L.seq() == R.seq();
+    return L.Target == R.Target && L.seq() == R.seq() && L.Guard == R.Guard;
   }
 };
 
@@ -186,6 +216,9 @@
   // A table of nonterminals, sorted by name.
   // SymbolID is the index of the table.
   std::vector<Nonterminal> Nonterminals;
+  // A table of attribute values, sorted by name.
+  // AttributeID is the index of the table.
+  std::vector<std::string> Attributes;
 };
 
 } // namespace pseudo
Index: clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -110,6 +110,10 @@
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
 
+using ReduceGuard =
+    std::function<bool(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
+                       const TokenStream &Tokens, const Grammar &G)>;
+
 // Parameters for the GLR parsing.
 struct ParseParams {
   // The grammar of the language we're going to parse.
@@ -121,6 +125,8 @@
   // Arena for data structure used by the GLR algorithm.
   ForestArena &Forest;  // Storage for the output forest.
   GSS &GSStack;         // Storage for parsing stacks.
+
+  ReduceGuard Guard; // Guard for reducing.
 };
 // Parses the given token stream as the start symbol with the GLR algorithm,
 // and returns a forest node of the start symbol.
@@ -157,8 +163,8 @@
 // add elements to PendingReduce
 //
 // Exposed for testing only.
-void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
-               NewHeadCallback NewHeadCB);
+void glrReduce(std::vector<ParseStep> &PendingReduce, const TokenStream &Tokens,
+               const ParseParams &Params, NewHeadCallback NewHeadCB);
 
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -86,6 +86,12 @@
       std::replace(Name.begin(), Name.end(), '-', '_');
       Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
     }
+    for (clang::pseudo::AttributeID AID = 1 /*skip the sentinel 0 value*/;
+         AID < G->table().Attributes.size(); ++AID) {
+      llvm::StringRef Name = G->table().Attributes[AID];
+      assert(!Name.empty());
+      Out.os() << llvm::formatv("ATTRIBUTE({0}, {1})\n", Name, AID);
+    }
     break;
   case EmitGrammarContent:
     for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D126536: [pseudo] Add grammar annotations support.

Reply via email to