sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.

- declaration recovery strategy: search for likely declaration boundaries
- change decl-sequence to right-recursive to simplify this
- also use for class members (already right-recursive)
- refactor recovery strategy to use struct parameter and expose cursor position


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130460

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/Token.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -48,13 +48,13 @@
                            testing::UnorderedElementsAreArray(Parents));
 }
 
-Token::Index recoverBraces(Token::Index Begin, const TokenStream &Code) {
-  EXPECT_GT(Begin, 0u);
-  const Token &Left = Code.tokens()[Begin - 1];
+Token::Index recoverBraces(const RecoveryParams &P) {
+  EXPECT_GT(P.Begin, 0u);
+  const Token &Left = P.Tokens.tokens()[P.Begin - 1];
   EXPECT_EQ(Left.Kind, tok::l_brace);
   if (const auto* Right = Left.pair()) {
     EXPECT_EQ(Right->Kind, tok::r_brace);
-    return Code.index(*Right);
+    return P.Tokens.index(*Right);
   }
   return Token::Invalid;
 }
Index: clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
@@ -0,0 +1,22 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+
+this is not a valid declaration;
+// CHECK: declaration := <opaque>
+
+int x[0];
+// CHECK:      simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK-NEXT: ├─decl-specifier-seq~INT := tok[7]
+// CHECK-NEXT: ├─init-declarator-list~noptr-declarator := noptr-declarator [ constant-expression ]
+
+also_not_ok ??? @@@ (;
+// CHECK: declaration := <opaque>
+
+class Y{};
+// CHECK:      simple-declaration := decl-specifier-seq ;
+// CHECK-NEXT: ├─decl-specifier-seq~class-specifier := class-head { }
+
+// FIXME: support recovery of incomplete scopes
+// namespace foo {
+
+class class class class function
+// CHECK: declaration := <opaque>
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -309,8 +309,7 @@
 declaration-statement := block-declaration
 
 # gram.dcl
-declaration-seq := declaration
-declaration-seq := declaration-seq declaration
+declaration-seq := declaration [recover=NextDeclaration] declaration-seq_opt
 declaration := block-declaration
 declaration := nodeclspec-function-declaration
 declaration := function-definition
@@ -535,13 +534,12 @@
 module-partition := : module-name-qualifier_opt IDENTIFIER
 module-name-qualifier := IDENTIFIER .
 module-name-qualifier := module-name-qualifier IDENTIFIER .
-export-declaration := EXPORT declaration
-export-declaration := EXPORT ( declaration-seq_opt )
+export-declaration := EXPORT { declaration-seq_opt }
 export-declaration := export-keyword module-import-declaration
-module-import-declaration := import-keyword module-name
-module-import-declaration := import-keyword module-partition
+module-import-declaration := import-keyword module-name ;
+module-import-declaration := import-keyword module-partition ;
 # FIXME: we don't have header-name in the grammar. Handle these in PP?
-# module-import-declaration := import-keyword header-name
+# module-import-declaration := import-keyword header-name ;
 global-module-fragment := module-keyword ; declaration-seq_opt
 private-module-fragment := module-keyword : PRIVATE ; declaration-seq_opt
 
@@ -554,7 +552,7 @@
 class-key := CLASS
 class-key := STRUCT
 class-key := UNION
-member-specification := member-declaration member-specification_opt
+member-specification := member-declaration [recover=NextDeclaration] member-specification_opt
 member-specification := access-specifier : member-specification_opt
 member-declaration := decl-specifier-seq_opt member-declarator-list_opt ;
 member-declaration := function-definition
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -252,21 +252,122 @@
 #undef SYMBOL_GUARD
 }
 
-Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
-  assert(Begin > 0);
-  const Token &Left = Tokens.tokens()[Begin - 1];
+Token::Index recoverBrackets(const RecoveryParams &P) {
+  assert(P.Begin > 0);
+  const Token &Left = P.Tokens.tokens()[P.Begin - 1];
   assert(Left.Kind == tok::l_brace || Left.Kind == tok::l_paren ||
          Left.Kind == tok::l_square);
   if (const Token *Right = Left.pair()) {
-    assert(Tokens.index(*Right) > Begin - 1);
-    return Tokens.index(*Right);
+    assert(P.Tokens.index(*Right) > P.Begin - 1);
+    return P.Tokens.index(*Right);
   }
   return Token::Invalid;
 }
 
+Token::Index recoverNextDeclaration(const RecoveryParams &P) {
+  if (P.Begin == P.Tokens.tokens().size())
+    return Token::Invalid;
+
+  // Declarations that don't end in semicolons:
+  //   function-definition
+  //   EXPORT {...}
+  //   EXTERN string-literal {...}
+  //   INLINE_opt NAMESPACE IDENTIFIER {...}
+  //   INLINE_opt NAMESPACE {...}
+  //   NAMESPACE enclosing-namespace-specifier :: INLINE_opt IDENTIFIER {...}
+  // Functions are hard to do anything about:
+  //  - if the declarator is good and the braces are present, parsing succeeds
+  //  - if the declarator is bad, it's hard to know it's a function
+  //  - if the braces are missing, we don't know where to recover
+  // So We only try to detect the others.
+  bool AcceptBraces = false;
+  auto ConsiderForBraces = [&](tok::TokenKind K) {
+    if (K == tok::kw_export || K == tok::kw_extern || K == tok::kw_namespace)
+      AcceptBraces = true;
+  };
+
+  // Some tokens are pretty good indicators of a declaration starting, when
+  // they appear at the beginning of a line.
+  auto LikelyStartsDeclaration = [&](tok::TokenKind K) -> bool {
+    switch (K) {
+    // Things you might be declaring.
+    case tok::kw_template:
+    case tok::kw_using:
+    case tok::kw_namespace:
+    case tok::kw_typedef:
+    case tok::kw_class:
+    case tok::kw_struct:
+    case tok::kw_union:
+    // Types & other decl-specifiers
+    case tok::kw_void:
+    case tok::kw_auto:
+    case tok::kw_int:
+    case tok::kw_char:
+    case tok::kw_float:
+    case tok::kw_double:
+    case tok::kw_long:
+    case tok::kw_signed:
+    case tok::kw_unsigned:
+    case tok::kw_const:
+    case tok::kw_volatile:
+    case tok::kw_static:
+    case tok::kw_extern:
+    // Modules
+    case tok::kw_import:
+    case tok::kw_export:
+    // Class protection
+    case tok::kw_public:
+    case tok::kw_private:
+    case tok::kw_protected:
+      return true;
+    default:
+      return false;
+    }
+  };
+
+  // The indentation level where the declaration started hints at where a new
+  // sibling declaration might start.
+  unsigned OrigIndent = P.Tokens.tokens()[P.Begin].Indent;
+
+  // Walk over tokens at the appropriate bracket nesting level.
+  for (const Token *T = &P.Tokens.tokens()[P.Begin].next(); T->Kind != tok::eof;
+       T = &T->next()) {
+    // Handle (paired) brackets.
+    if (const Token *B = T->pair()) {
+      // Closing bracket not paired within the recovery range.
+      if (B < T) {
+        assert(P.Tokens.index(*B) < P.Begin && "Why didn't we skip this one?");
+        assert(P.Tokens.index(*T) >= P.Cursor &&
+               "How did we parse an unmatched closing bracket?!");
+        return P.Tokens.index(*T);
+      }
+      // Opening bracket: is this a semicolonless statement?
+      if (T->Kind == tok::l_brace && AcceptBraces &&
+          P.Tokens.index(*B) >= P.Cursor)
+        return P.Tokens.index(*B);
+      // Skip over opening/closing bracket pair.
+      T = B;
+      continue;
+    }
+
+    // Semicolons terminate statements.
+    if (T->Kind == tok::semi)
+      return P.Tokens.index(*T) + 1;
+    ConsiderForBraces(T->Kind);
+
+    // Stop if it looks like a new declaration is starting on the next line.
+    if (T->Indent == OrigIndent && T->Line != T->prev().Line &&
+        LikelyStartsDeclaration(T->Kind))
+      return P.Tokens.index(*T);
+  }
+  // If we reached EOF, then assume the declaration runs right to EOF.
+  return P.Tokens.tokens().size();
+}
+
 llvm::DenseMap<ExtensionID, RecoveryStrategy> buildRecoveryStrategies() {
   return {
       {(ExtensionID)Extension::Brackets, recoverBrackets},
+      {(ExtensionID)Extension::NextDeclaration, recoverNextDeclaration},
   };
 }
 
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -28,12 +28,12 @@
 namespace {
 
 Token::Index findRecoveryEndpoint(ExtensionID Strategy, Token::Index Begin,
+                                  Token::Index Cursor,
                                   const TokenStream &Tokens,
                                   const Language &Lang) {
   assert(Strategy != 0);
-  assert(Begin > 0);
   if (auto S = Lang.RecoveryStrategies.lookup(Strategy))
-    return S(Begin, Tokens);
+    return S({Begin, Cursor, Tokens});
   return Token::Invalid;
 }
 
@@ -141,7 +141,7 @@
       break;
 
     auto End = findRecoveryEndpoint(Option.Strategy, Option.Position,
-                                    Params.Code, Lang);
+                                    TokenIndex, Params.Code, Lang);
     // Recovery may not take the parse backwards.
     if (End == Token::Invalid || End < TokenIndex)
       continue;
Index: clang-tools-extra/pseudo/include/clang-pseudo/Token.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Token.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Token.h
@@ -82,6 +82,11 @@
     assert(Kind != tok::eof);
     return *(this + 1);
   }
+  /// Returns the previous token in the stream. this may not be a sentinel.
+  const Token &prev() const {
+    assert(Kind != tok::eof);
+    return *(this - 1);
+  }
   /// Returns the next token in the stream, skipping over comments.
   const Token &nextNC() const {
     const Token *T = this;
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -34,6 +34,14 @@
 // Return true if the guard is satisfied.
 using RuleGuard = llvm::function_ref<bool(const GuardParams &)>;
 
+struct RecoveryParams {
+  // The position in the stream where the node being recovered will start.
+  Token::Index Begin;
+  // The position in the stream where the parse failed.
+  // Begin <= Cursor <= End (for a valid recovery).
+  Token::Index Cursor;
+  const TokenStream &Tokens;
+};
 // A recovery strategy determines a region of code to skip when parsing fails.
 //
 // For example, given `class-def := CLASS IDENT { body [recover=Brackets] }`,
@@ -43,7 +51,7 @@
 // The provided index is the token where the skipped region begins.
 // Returns the (excluded) end of the range, or Token::Invalid for no recovery.
 using RecoveryStrategy =
-    llvm::function_ref<Token::Index(Token::Index Start, const TokenStream &)>;
+    llvm::function_ref<Token::Index(const RecoveryParams &)>;
 
 // Specify a language that can be parsed by the pseduoparser.
 struct Language {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to