sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.

After this, NUMERIC_CONSTANT and strings should parse only one way.

There are 8 types of literals, and 24 valid (literal, TokenKind) pairs.
This means adding 8 new named guards (or 24, if we want to assert the token).

It seems fairly clear to me at this point that the guard names are unneccesary
indirection: the guards are in fact coupled to the rule signature.

(Also add the zero guard I forgot in the previous patch.)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130066

Files:
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
  clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
  clang-tools-extra/pseudo/test/cxx/literals.cpp
  clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp
  clang-tools-extra/pseudo/unittests/GrammarTest.cpp

Index: clang-tools-extra/pseudo/unittests/GrammarTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -102,16 +102,11 @@
 TEST_F(GrammarTest, Annotation) {
   build(R"bnf(
     _ := x
-
-    x := y [guard=value]
-    y := IDENTIFIER [guard=final]
-
+    x := IDENTIFIER [guard]
   )bnf");
-  ASSERT_TRUE(Diags.empty());
-  EXPECT_EQ(G.lookupRule(ruleFor("_")).Guard, 0);
-  EXPECT_GT(G.lookupRule(ruleFor("x")).Guard, 0);
-  EXPECT_GT(G.lookupRule(ruleFor("y")).Guard, 0);
-  EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard);
+  ASSERT_THAT(Diags, IsEmpty());
+  EXPECT_FALSE(G.lookupRule(ruleFor("_")).Guarded);
+  EXPECT_TRUE(G.lookupRule(ruleFor("x")).Guarded);
 }
 
 TEST_F(GrammarTest, MangleName) {
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -631,10 +631,10 @@
   build(R"bnf(
     _ := start
 
-    start := IDENTIFIER [guard=TestOnly]
+    start := IDENTIFIER [guard]
   )bnf");
   TestLang.Guards.try_emplace(
-      extensionID("TestOnly"),
+      ruleFor("start"),
       [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
         assert(RHS.size() == 1 &&
                RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
@@ -647,7 +647,7 @@
   const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions);
   EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang)
                 .dumpRecursive(TestLang.G),
-            "[  0, end) start := IDENTIFIER [guard=TestOnly]\n"
+            "[  0, end) start := IDENTIFIER [guard]\n"
             "[  0, end) └─IDENTIFIER := tok[0]\n");
 
   Input = "notest";
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -45,6 +45,8 @@
                     desc("Strip directives and select conditional sections"));
 static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
 static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
+static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"),
+                              init(true));
 static opt<std::string> StartSymbol("start-symbol",
                                     desc("specify the start symbol to parse"),
                                     init("translation-unit"));
@@ -148,7 +150,7 @@
         glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
                  *StartSymID, Lang);
     if (PrintForest)
-      llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
+      llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev);
 
     if (PrintStatistics) {
       llvm::outs() << "Forest bytes: " << Arena.bytes()
Index: clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
+++ clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
@@ -5,16 +5,16 @@
 // CHECK-NEXT: ├─{ := tok[3]
 // CHECK-NEXT: ├─initializer-list
 // CHECK-NEXT: │ ├─initializer-list
-// CHECK-NEXT: │ │ ├─initializer-list~literal
-// CHECK:      │ │ ├─, := tok[5]
+// CHECK-NEXT: │ │ ├─initializer-list~NUMERIC_CONSTANT
+// CHECK-NEXT: │ │ ├─, := tok[5]
 // CHECK-NEXT: │ │ └─initializer-list-item
 // CHECK-NEXT: │ │   ├─designator
 // CHECK-NEXT: │ │   │ ├─. := tok[6]
 // CHECK-NEXT: │ │   │ └─IDENTIFIER := tok[7]
 // CHECK-NEXT: │ │   └─brace-or-equal-initializer
 // CHECK-NEXT: │ │     ├─= := tok[8]
-// CHECK-NEXT: │ │     └─initializer-clause~literal
-// CHECK:      │ ├─, := tok[10]
+// CHECK-NEXT: │ │     └─initializer-clause~NUMERIC_CONSTANT
+// CHECK-NEXT: │ ├─, := tok[10]
 // CHECK-NEXT: │ └─initializer-list-item
 // CHECK-NEXT: │   ├─designator
 // CHECK-NEXT: │   │ ├─[ := tok[11]
@@ -22,6 +22,6 @@
 // CHECK-NEXT: │   │ └─] := tok[13]
 // CHECK-NEXT: │   └─brace-or-equal-initializer~braced-init-list
 // CHECK-NEXT: │     ├─{ := tok[14]
-// CHECK-NEXT: │     ├─initializer-list~literal
+// CHECK-NEXT: │     ├─initializer-list~NUMERIC_CONSTANT
 // CHECK:      │     └─} := tok[16]
 // CHECK-NEXT: └─} := tok[17]
Index: clang-tools-extra/pseudo/test/cxx/literals.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/cxx/literals.cpp
@@ -0,0 +1,43 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest -forest-abbrev=0 | FileCheck %s --implicit-check-not=ambiguous
+auto list = {
+  0,      // CHECK: := integer-literal
+  0b1011, // CHECK: := integer-literal
+  0777,   // CHECK: := integer-literal
+  42_u,   // CHECK: := user-defined-integer-literal
+  0LL,    // CHECK: := integer-literal
+  0h,     // CHECK: := user-defined-integer-literal
+  0.,     // CHECK: := floating-point-literal
+  .2,     // CHECK: := floating-point-literal
+  2e1,    // CHECK: := floating-point-literal
+  0x42d,  // CHECK: := integer-literal
+  0x42_d, // CHECK: := user-defined-integer-literal
+  0x42ds, // CHECK: := user-defined-integer-literal
+  0x1.2p2,// CHECK: := floating-point-literal
+  
+  "",               // CHECK: literal := string-literal
+  L"",              // CHECK: literal := string-literal
+  u8"",             // CHECK: literal := string-literal
+  u"",              // CHECK: literal := string-literal
+  U"",              // CHECK: literal := string-literal
+  R"()",            // CHECK: literal := string-literal
+  uR"()",           // CHECK: literal := string-literal
+  "a" "b",          // CHECK: literal := string-literal
+  u8"a" "b",        // CHECK: literal := string-literal
+  u"a" u"b",        // CHECK: literal := string-literal
+  "a"_u "b",        // CHECK: user-defined-literal := user-defined-string-literal
+  "a"_u u"b",       // CHECK: user-defined-literal := user-defined-string-literal
+  R"(a)" "\n",      // CHECK: literal := string-literal
+  R"c(a)c"_u u"\n", // CHECK: user-defined-literal := user-defined-string-literal
+
+  'a',      // CHECK: := character-literal
+  'abc',    // CHECK: := character-literal
+  'abcdef', // CHECK: := character-literal
+  u'a',     // CHECK: := character-literal
+  U'a',     // CHECK: := character-literal
+  L'a',     // CHECK: := character-literal
+  L'abc',   // CHECK: := character-literal
+  U'\u1234',// CHECK: := character-literal
+  '\u1234', // CHECK: := character-literal
+  u'a'_u,   // CHECK: := user-defined-character-literal
+};
+
Index: clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -258,7 +258,7 @@
     for (unsigned I = 0; I < Spec.Sequence.size(); ++I) {
       for (const auto &KV : Spec.Sequence[I].Attributes) {
         if (KV.first == "guard") {
-          R.Guard = LookupExtensionID(KV.second);
+          R.Guarded = true;
         } else if (KV.first == "recover") {
           R.Recovery = LookupExtensionID(KV.second);
           R.RecoveryIndex = I;
Index: clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -86,8 +86,8 @@
     if (R.RecoveryIndex == I)
       OS << " [recover=" << T->AttributeValues[R.Recovery] << "]";
   }
-  if (R.Guard)
-    OS << " [guard=" << T->AttributeValues[R.Guard] << "]";
+  if (R.Guarded)
+    OS << " [guard]";
   return Result;
 }
 
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -704,18 +704,18 @@
 literal := boolean-literal
 literal := pointer-literal
 literal := user-defined-literal
-integer-literal := NUMERIC_CONSTANT
-character-literal := CHAR_CONSTANT
-character-literal := WIDE_CHAR_CONSTANT
-character-literal := UTF8_CHAR_CONSTANT
-character-literal := UTF16_CHAR_CONSTANT
-character-literal := UTF32_CHAR_CONSTANT
-floating-point-literal := NUMERIC_CONSTANT
-string-literal-chunk := STRING_LITERAL
-string-literal-chunk := WIDE_STRING_LITERAL
-string-literal-chunk := UTF8_STRING_LITERAL
-string-literal-chunk := UTF16_STRING_LITERAL
-string-literal-chunk := UTF32_STRING_LITERAL
+integer-literal := NUMERIC_CONSTANT [guard]
+character-literal := CHAR_CONSTANT [guard]
+character-literal := WIDE_CHAR_CONSTANT [guard]
+character-literal := UTF8_CHAR_CONSTANT [guard]
+character-literal := UTF16_CHAR_CONSTANT [guard]
+character-literal := UTF32_CHAR_CONSTANT [guard]
+floating-point-literal := NUMERIC_CONSTANT [guard]
+string-literal-chunk := STRING_LITERAL [guard]
+string-literal-chunk := WIDE_STRING_LITERAL [guard]
+string-literal-chunk := UTF8_STRING_LITERAL [guard]
+string-literal-chunk := UTF16_STRING_LITERAL [guard]
+string-literal-chunk := UTF32_STRING_LITERAL [guard]
 #! Technically, string concatenation happens at phase 6 which is before parsing,
 #! so it doesn't belong to the grammar. However, we extend the grammar to
 #! support it, to make the pseudoparser fully functional on practical code.
@@ -725,33 +725,33 @@
 user-defined-literal := user-defined-floating-point-literal
 user-defined-literal := user-defined-string-literal
 user-defined-literal := user-defined-character-literal
-user-defined-integer-literal := NUMERIC_CONSTANT
-user-defined-string-literal-chunk := STRING_LITERAL
-user-defined-string-literal-chunk := WIDE_STRING_LITERAL
-user-defined-string-literal-chunk := UTF8_STRING_LITERAL
-user-defined-string-literal-chunk := UTF16_STRING_LITERAL
-user-defined-string-literal-chunk := UTF32_STRING_LITERAL
+user-defined-integer-literal := NUMERIC_CONSTANT [guard]
+user-defined-string-literal-chunk := STRING_LITERAL [guard]
+user-defined-string-literal-chunk := WIDE_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF8_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF16_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF32_STRING_LITERAL [guard]
 user-defined-string-literal := user-defined-string-literal-chunk
 user-defined-string-literal := string-literal-chunk user-defined-string-literal
 user-defined-string-literal := user-defined-string-literal string-literal-chunk
-user-defined-floating-point-literal := NUMERIC_CONSTANT
-user-defined-character-literal := CHAR_CONSTANT
-user-defined-character-literal := WIDE_CHAR_CONSTANT
-user-defined-character-literal := UTF8_CHAR_CONSTANT
-user-defined-character-literal := UTF16_CHAR_CONSTANT
-user-defined-character-literal := UTF32_CHAR_CONSTANT
+user-defined-floating-point-literal := NUMERIC_CONSTANT [guard]
+user-defined-character-literal := CHAR_CONSTANT [guard]
+user-defined-character-literal := WIDE_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF8_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF16_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF32_CHAR_CONSTANT [guard]
 boolean-literal := FALSE
 boolean-literal := TRUE
 pointer-literal := NULLPTR
 
 #! Contextual keywords -- clang lexer always lexes them as identifier tokens.
 #! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER [guard=Override]
-contextual-final := IDENTIFIER [guard=Final]
-contextual-zero := NUMERIC_CONSTANT [guard=Zero]
-module-keyword := IDENTIFIER [guard=Module]
-import-keyword := IDENTIFIER [guard=Import]
-export-keyword := IDENTIFIER [guard=Export]
+contextual-override := IDENTIFIER [guard]
+contextual-final := IDENTIFIER [guard]
+contextual-zero := NUMERIC_CONSTANT [guard]
+module-keyword := IDENTIFIER [guard]
+import-keyword := IDENTIFIER [guard]
+export-keyword := IDENTIFIER [guard]
 
 #! greatergreater token -- clang lexer always lexes it as a single token, we
 #! split it into two tokens to make the GLR parser aware of the nested-template
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -11,6 +11,9 @@
 #include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/StringSwitch.h"
 #include <utility>
 
 namespace clang {
@@ -21,38 +24,152 @@
 #include "CXXBNF.inc"
     ;
 
-bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
-                   const TokenStream &Tokens) {
-  assert(RHS.size() == 1 &&
-         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
-  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+// RHS is expected to contain a single terminal.
+// Returns the corresponding token.
+const Token &onlyToken(tok::TokenKind Kind,
+                       const ArrayRef<const ForestNode *> RHS,
+                       const TokenStream &Tokens) {
+  assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(Kind));
+  return Tokens.tokens()[RHS.front()->startTokenIndex()];
 }
-bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
-                const TokenStream &Tokens) {
-  assert(RHS.size() == 1 &&
-         RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
-  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
-}
-bool guardModule(llvm::ArrayRef<const ForestNode *> RHS,
-                 const TokenStream &Tokens) {
-  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "module";
-}
-bool guardImport(llvm::ArrayRef<const ForestNode *> RHS,
-                 const TokenStream &Tokens) {
-  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "import";
-}
-bool guardExport(llvm::ArrayRef<const ForestNode *> RHS,
-                 const TokenStream &Tokens) {
-  return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "export";
+
+#define TOKEN_GUARD(kind, cond)                                                \
+  [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {      \
+    const Token &Tok = onlyToken(tok::kind, RHS, Tokens);                      \
+    return cond;                                                               \
+  }
+
+bool isStringUD(const Token &Tok) { return !Tok.text().endswith("\""); }
+bool isCharUD(const Token &Tok) { return !Tok.text().endswith("'"); }
+
+// Combinable flags describing numbers.
+// Clang has just one numeric_token kind, the grammar has 4.
+enum NumericKind {
+  Integer = 0,
+  Floating = 1 << 0,
+  UserDefined = 1 << 1,
+};
+// Determine the kind of numeric_constant we have.
+// We can assume it's something valid, as it has been lexed.
+// FIXME: is this expensive enough that we should set flags on the token
+// and reuse them rather than computing it for each guard?
+unsigned numKind(const Token &Tok) {
+  assert(Tok.Kind == tok::numeric_constant);
+  llvm::StringRef Text = Tok.text();
+  if (Text.size() <= 1)
+    return Integer;
+  bool Hex =
+      Text.size() > 2 && Text[0] == '0' && (Text[1] == 'x' || Text[1] == 'X');
+  uint8_t K = Integer;
+
+  for (char C : Text) {
+    switch (C) {
+    case '.':
+      K |= Floating;
+      break;
+    case 'e':
+    case 'E':
+      if (!Hex)
+        K |= Floating;
+      break;
+    case 'p':
+    case 'P':
+      if (Hex)
+        K |= Floating;
+      break;
+    case '_':
+      K |= UserDefined;
+      break;
+    default:
+      break;
+    }
+  }
+
+  // We would be done here, but there are stdlib UDLs that lack _.
+  // We must distinguish these from the builtin suffixes.
+  unsigned LastLetter = Text.size();
+  while (LastLetter > 0 && isLetter(Text[LastLetter - 1]))
+    --LastLetter;
+  if (LastLetter == Text.size()) // Common case
+    return NumericKind(K);
+  // Trailing d/e/f are not part of the suffix in hex numbers.
+  while (Hex && LastLetter < Text.size() && isHexDigit(Text[LastLetter]))
+    ++LastLetter;
+  return llvm::StringSwitch<int, unsigned>(Text.substr(LastLetter))
+      // std::chrono
+      .Cases("h", "min", "s", "ms", "us", "ns", "d", "y", K | UserDefined)
+      // complex
+      .Cases("il", "i", "if", K | UserDefined)
+      .Default(K);
 }
 
 llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
   return {
-      {(ExtensionID)Extension::Override, guardOverride},
-      {(ExtensionID)Extension::Final, guardFinal},
-      {(ExtensionID)Extension::Import, guardImport},
-      {(ExtensionID)Extension::Export, guardExport},
-      {(ExtensionID)Extension::Module, guardModule},
+      {(RuleID)Rule::contextual_override_0identifier,
+       TOKEN_GUARD(identifier, Tok.text() == "override")},
+      {(RuleID)Rule::contextual_final_0identifier,
+       TOKEN_GUARD(identifier, Tok.text() == "final")},
+      {(RuleID)Rule::import_keyword_0identifier,
+       TOKEN_GUARD(identifier, Tok.text() == "import")},
+      {(RuleID)Rule::export_keyword_0identifier,
+       TOKEN_GUARD(identifier, Tok.text() == "export")},
+      {(RuleID)Rule::module_keyword_0identifier,
+       TOKEN_GUARD(identifier, Tok.text() == "module")},
+      {(RuleID)Rule::contextual_zero_0numeric_constant,
+       TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
+
+      // The grammar distinguishes (only) user-defined vs plain string literals,
+      // where the clang lexer distinguishes (only) encoding types.
+      {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal,
+       TOKEN_GUARD(string_literal, isStringUD(Tok))},
+      {(RuleID)Rule::user_defined_string_literal_chunk_0utf8_string_literal,
+       TOKEN_GUARD(utf8_string_literal, isStringUD(Tok))},
+      {(RuleID)Rule::user_defined_string_literal_chunk_0utf16_string_literal,
+       TOKEN_GUARD(utf16_string_literal, isStringUD(Tok))},
+      {(RuleID)Rule::user_defined_string_literal_chunk_0utf32_string_literal,
+       TOKEN_GUARD(utf32_string_literal, isStringUD(Tok))},
+      {(RuleID)Rule::user_defined_string_literal_chunk_0wide_string_literal,
+       TOKEN_GUARD(wide_string_literal, isStringUD(Tok))},
+      {(RuleID)Rule::string_literal_chunk_0string_literal,
+       TOKEN_GUARD(string_literal, !isStringUD(Tok))},
+      {(RuleID)Rule::string_literal_chunk_0utf8_string_literal,
+       TOKEN_GUARD(utf8_string_literal, !isStringUD(Tok))},
+      {(RuleID)Rule::string_literal_chunk_0utf16_string_literal,
+       TOKEN_GUARD(utf16_string_literal, !isStringUD(Tok))},
+      {(RuleID)Rule::string_literal_chunk_0utf32_string_literal,
+       TOKEN_GUARD(utf32_string_literal, !isStringUD(Tok))},
+      {(RuleID)Rule::string_literal_chunk_0wide_string_literal,
+       TOKEN_GUARD(wide_string_literal, !isStringUD(Tok))},
+      // And the same for chars.
+      {(RuleID)Rule::user_defined_character_literal_0char_constant,
+       TOKEN_GUARD(char_constant, isCharUD(Tok))},
+      {(RuleID)Rule::user_defined_character_literal_0utf8_char_constant,
+       TOKEN_GUARD(utf8_char_constant, isCharUD(Tok))},
+      {(RuleID)Rule::user_defined_character_literal_0utf16_char_constant,
+       TOKEN_GUARD(utf16_char_constant, isCharUD(Tok))},
+      {(RuleID)Rule::user_defined_character_literal_0utf32_char_constant,
+       TOKEN_GUARD(utf32_char_constant, isCharUD(Tok))},
+      {(RuleID)Rule::user_defined_character_literal_0wide_char_constant,
+       TOKEN_GUARD(wide_char_constant, isCharUD(Tok))},
+      {(RuleID)Rule::character_literal_0char_constant,
+       TOKEN_GUARD(char_constant, !isCharUD(Tok))},
+      {(RuleID)Rule::character_literal_0utf8_char_constant,
+       TOKEN_GUARD(utf8_char_constant, !isCharUD(Tok))},
+      {(RuleID)Rule::character_literal_0utf16_char_constant,
+       TOKEN_GUARD(utf16_char_constant, !isCharUD(Tok))},
+      {(RuleID)Rule::character_literal_0utf32_char_constant,
+       TOKEN_GUARD(utf32_char_constant, !isCharUD(Tok))},
+      {(RuleID)Rule::character_literal_0wide_char_constant,
+       TOKEN_GUARD(wide_char_constant, !isCharUD(Tok))},
+      // clang just has one NUMERIC_CONSTANT token for {ud,plain}x{float,int}
+      {(RuleID)Rule::user_defined_integer_literal_0numeric_constant,
+       TOKEN_GUARD(numeric_constant, numKind(Tok) == (Integer | UserDefined))},
+      {(RuleID)Rule::user_defined_floating_point_literal_0numeric_constant,
+       TOKEN_GUARD(numeric_constant, numKind(Tok) == (Floating | UserDefined))},
+      {(RuleID)Rule::integer_literal_0numeric_constant,
+       TOKEN_GUARD(numeric_constant, numKind(Tok) == Integer)},
+      {(RuleID)Rule::floating_point_literal_0numeric_constant,
+       TOKEN_GUARD(numeric_constant, numKind(Tok) == Floating)},
   };
 }
 
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -416,11 +416,11 @@
   }
 
 private:
-  bool canReduce(ExtensionID GuardID, RuleID RID,
+  bool canReduce(const Rule &R, RuleID RID,
                  llvm::ArrayRef<const ForestNode *> RHS) const {
-    if (!GuardID)
+    if (!R.Guarded)
       return true;
-    if (auto Guard = Lang.Guards.lookup(GuardID))
+    if (auto Guard = Lang.Guards.lookup(RID))
       return Guard(RHS, Params.Code);
     LLVM_DEBUG(llvm::dbgs()
                << llvm::formatv("missing guard implementation for rule {0}\n",
@@ -441,7 +441,7 @@
           for (const auto *B : N->parents())
             llvm::dbgs() << "    --> base at S" << B->State << "\n";
         });
-        if (!canReduce(Rule.Guard, RID, TempSequence))
+        if (!canReduce(Rule, RID, TempSequence))
           return;
         // Copy the chain to stable storage so it can be enqueued.
         if (SequenceStorageCount == SequenceStorage.size())
@@ -572,7 +572,7 @@
       TempSequence[Rule.Size - 1 - I] = Base->Payload;
       Base = Base->parents().front();
     }
-    if (!canReduce(Rule.Guard, *RID, TempSequence))
+    if (!canReduce(Rule, *RID, TempSequence))
       return true; // reduction is not available
     const ForestNode *Parsed =
         &Params.Forest.createSequence(Rule.Target, *RID, TempSequence);
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -118,11 +118,8 @@
   uint8_t Size : SizeBits; // Size of the Sequence
   SymbolID Sequence[MaxElements];
 
-  // A guard extension controls whether a reduction of a rule will be conducted
-  // by the GLR parser.
-  // 0 is sentinel unset extension ID, indicating there is no guard extension
-  // being set for this rule.
-  ExtensionID Guard = 0;
+  // A guarded rule has extra logic to determine whether the RHS is eligible.
+  bool Guarded = false;
 
   // Specifies the index within Sequence eligible for error recovery.
   // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we
@@ -136,7 +133,7 @@
     return llvm::ArrayRef<SymbolID>(Sequence, Size);
   }
   friend bool operator==(const Rule &L, const Rule &R) {
-    return L.Target == R.Target && L.seq() == R.seq() && L.Guard == R.Guard;
+    return L.Target == R.Target && L.seq() == R.seq() && L.Guarded == R.Guarded;
   }
 };
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -46,7 +46,7 @@
   LRTable Table;
 
   // Binding extension ids to corresponding implementations.
-  llvm::DenseMap<ExtensionID, RuleGuard> Guards;
+  llvm::DenseMap<RuleID, RuleGuard> Guards;
   llvm::DenseMap<ExtensionID, RecoveryStrategy> RecoveryStrategies;
 
   // FIXME: add clang::LangOptions.
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -99,7 +99,8 @@
     for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
          EID < G.table().AttributeValues.size(); ++EID) {
       llvm::StringRef Name = G.table().AttributeValues[EID];
-      assert(!Name.empty());
+      if (Name.empty())
+        continue;
       Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
     }
     Out.os() << R"cpp(
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to