v.g.vassilev created this revision.
v.g.vassilev added reviewers: rsmith, rjmccall, lhames, sgraenitz.
Herald added a subscriber: StephenFan.
Herald added a project: All.
v.g.vassilev requested review of this revision.

This patch teaches clang to parse statements on the global scope to allow:

  ./bin/clang-repl
  clang-repl> int i = 12;
  clang-repl> ++i;
  clang-repl> extern "C" int printf(const char*,...);
   clang-repl> printf("%d\n", i);
   13
   clang-repl> quit
   

      

The patch conceptually models the possible "top-level" statements between two 
top-level declarations into a block which is emitted as part of the global init 
function.

The current implementation in CodeGen is a placeholder allowing us to discuss 
what would be the best approach to tackle this usecase. CodeGen works with 
"GlobalDecl"s and it is awkward to model a block of statements in that way. 
Currently, we attach each statement as an initializer of a global VarDecl. 
However, that granularity is not desirable.


Repository:
  rC Clang

https://reviews.llvm.org/D127284

Files:
  clang/include/clang/AST/ASTConsumer.h
  clang/include/clang/Parse/Parser.h
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/lib/CodeGen/ModuleBuilder.cpp
  clang/lib/Interpreter/IncrementalParser.cpp
  clang/lib/Parse/ParseDecl.cpp
  clang/lib/Parse/ParseTentative.cpp
  clang/lib/Parse/Parser.cpp
  clang/test/Interpreter/execute-stmts.cpp

Index: clang/test/Interpreter/execute-stmts.cpp
===================================================================
--- /dev/null
+++ clang/test/Interpreter/execute-stmts.cpp
@@ -0,0 +1,18 @@
+// REQUIRES: host-supports-jit
+// UNSUPPORTED: system-aix
+// RUN: cat %s | clang-repl | FileCheck %s
+
+int i = 12;
+++i;
+extern "C" int printf(const char*,...);
+printf("i = %d\n", i);
+// CHECK: i = 13
+
+// FIXME: isConstructorDeclarator thinks these are ctors:
+// namespace Ns { void f(){} }
+// Ns::f();
+
+// void g() {}
+// g();
+
+quit
Index: clang/lib/Parse/Parser.cpp
===================================================================
--- clang/lib/Parse/Parser.cpp
+++ clang/lib/Parse/Parser.cpp
@@ -582,13 +582,14 @@
 ///
 /// Note that in C, it is an error if there is no first declaration.
 bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result,
-                                    Sema::ModuleImportState &ImportState) {
+                                    Sema::ModuleImportState &ImportState,
+                                    StmtVector *Stmts) {
   Actions.ActOnStartOfTranslationUnit();
 
   // For C++20 modules, a module decl must be the first in the TU.  We also
   // need to track module imports.
   ImportState = Sema::ModuleImportState::FirstDecl;
-  bool NoTopLevelDecls = ParseTopLevelDecl(Result, ImportState);
+  bool NoTopLevelDecls = ParseTopLevelDecl(Result, ImportState, Stmts);
 
   // C11 6.9p1 says translation units must have at least one top-level
   // declaration. C++ doesn't have this restriction. We also don't want to
@@ -609,7 +610,8 @@
 ///           declaration
 /// [C++20]   module-import-declaration
 bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
-                               Sema::ModuleImportState &ImportState) {
+                               Sema::ModuleImportState &ImportState,
+                               StmtVector *Stmts /*=nullptr*/) {
   DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(*this);
 
   // Skip over the EOF token, flagging end of previous input for incremental
@@ -724,6 +726,23 @@
   ParsedAttributes attrs(AttrFactory);
   MaybeParseCXX11Attributes(attrs);
 
+  // FIXME: Remove the incremental processing pre-condition and verify clang
+  // still can pass its test suite, which will harden `isDeclarationStatement`.
+  // It is known to have several weaknesses, for example in
+  // isConstructorDeclarator, infinite loop in c-index-test, etc..
+  // Parse a block of top-level-stmts.
+  while (PP.isIncrementalProcessingEnabled() && Stmts &&
+         !isDeclarationStatement()) {
+    //isStmtExpr ? ParsedStmtContext::InStmtExpr : ParsedStmtContext()
+    ParsedStmtContext SubStmtCtx = ParsedStmtContext();
+    auto R = ParseStatementOrDeclaration(*Stmts, SubStmtCtx);
+    if (!R.isUsable())
+      return true;
+    Stmts->push_back(R.get());
+    if (Tok.is(tok::eof))
+      return false;
+  }
+
   Result = ParseExternalDeclaration(attrs);
   // An empty Result might mean a line with ';' or some parsing error, ignore
   // it.
Index: clang/lib/Parse/ParseTentative.cpp
===================================================================
--- clang/lib/Parse/ParseTentative.cpp
+++ clang/lib/Parse/ParseTentative.cpp
@@ -22,6 +22,7 @@
 ///
 ///         declaration-statement:
 ///           block-declaration
+///           template-declaration
 ///
 ///         block-declaration:
 ///           simple-declaration
@@ -46,12 +47,24 @@
 ///           'using' 'namespace' '::'[opt] nested-name-specifier[opt]
 ///                 namespace-name ';'
 ///
+///         template-declaration:
+///           'template' declaration
+///           'template' concept-definition
+///
 bool Parser::isCXXDeclarationStatement() {
   switch (Tok.getKind()) {
-    // asm-definition
+  // OpenCL
+  case tok::kw___kernel:
+    return getLangOpts().OpenCL;
+
   case tok::kw_asm:
+  case tok::kw_export:
+  case tok::kw_import:
     // namespace-alias-definition
+  case tok::kw_inline: // inline namespace
   case tok::kw_namespace:
+    // template-declaration
+  case tok::kw_template:
     // using-declaration
     // using-directive
   case tok::kw_using:
@@ -59,7 +72,20 @@
   case tok::kw_static_assert:
   case tok::kw__Static_assert:
     return true;
+
+  case tok::identifier:
+    if (getLangOpts().CPlusPlus) {
+      // FIXME: This is quite broken as it considers f() from void f(){}; f();
+      // to be a constructor declarator...
+      if (isConstructorDeclarator(/*Unqualified=*/false))
+        return true;
+      // Check if this is a dtor.
+      if (!TryAnnotateTypeOrScopeToken() && Tok.is(tok::annot_cxxscope) &&
+          NextToken().is(tok::tilde))
+        return true;
+    }
     // simple-declaration
+    LLVM_FALLTHROUGH;
   default:
     return isCXXSimpleDeclaration(/*AllowForRangeDecl=*/false);
   }
@@ -1397,6 +1423,12 @@
   case tok::kw_volatile:
     return TPResult::True;
 
+    // ObjC
+  case tok::at:
+    if (getLangOpts().ObjC)
+      return TPResult::True;
+    LLVM_FALLTHROUGH;
+
     // OpenCL address space qualifiers
   case tok::kw_private:
     if (!getLangOpts().OpenCL)
Index: clang/lib/Parse/ParseDecl.cpp
===================================================================
--- clang/lib/Parse/ParseDecl.cpp
+++ clang/lib/Parse/ParseDecl.cpp
@@ -5243,11 +5243,22 @@
   switch (Tok.getKind()) {
   default: return false;
 
+  // ObjC
+  case tok::at:
+    return getLangOpts().ObjC;
+
+  // OpenCL
+  case tok::kw___kernel:
+    return getLangOpts().OpenCL;
+
   // OpenCL 2.0 and later define this keyword.
   case tok::kw_pipe:
     return getLangOpts().OpenCL &&
            getLangOpts().getOpenCLCompatibleVersion() >= 200;
 
+  case tok::kw_asm:
+    return true;
+
   case tok::identifier:   // foo::bar
     // Unfortunate hack to support "Class.factoryMethod" notation.
     if (getLangOpts().ObjC && NextToken().is(tok::period))
@@ -5500,6 +5511,7 @@
   }
   ConsumeParen();
 
+  // FIXME: this returns true for NS::f();
   // A right parenthesis, or ellipsis followed by a right parenthesis signals
   // that we have a constructor.
   if (Tok.is(tok::r_paren) ||
Index: clang/lib/Interpreter/IncrementalParser.cpp
===================================================================
--- clang/lib/Interpreter/IncrementalParser.cpp
+++ clang/lib/Interpreter/IncrementalParser.cpp
@@ -165,11 +165,17 @@
 
   Parser::DeclGroupPtrTy ADecl;
   Sema::ModuleImportState ImportState;
-  for (bool AtEOF = P->ParseFirstTopLevelDecl(ADecl, ImportState); !AtEOF;
-       AtEOF = P->ParseTopLevelDecl(ADecl, ImportState)) {
+  Parser::StmtVector Stmts;
+  for (bool AtEOF = P->ParseFirstTopLevelDecl(ADecl, ImportState, &Stmts);
+       !AtEOF; AtEOF = P->ParseTopLevelDecl(ADecl, ImportState, &Stmts)) {
     // If we got a null return and something *was* parsed, ignore it.  This
     // is due to a top-level semicolon, an action override, or a parse error
     // skipping something.
+    if (!Stmts.empty() && !Consumer->HandleTopLevelStmts(Stmts))
+      return llvm::make_error<llvm::StringError>("Parsing failed. "
+                                                 "The consumer rejected a stmt",
+                                                 std::error_code());
+
     if (ADecl && !Consumer->HandleTopLevelDecl(ADecl.get()))
       return llvm::make_error<llvm::StringError>("Parsing failed. "
                                                  "The consumer rejected a decl",
Index: clang/lib/CodeGen/ModuleBuilder.cpp
===================================================================
--- clang/lib/CodeGen/ModuleBuilder.cpp
+++ clang/lib/CodeGen/ModuleBuilder.cpp
@@ -168,7 +168,30 @@
       Builder->HandleCXXStaticMemberVarInstantiation(VD);
     }
 
+    bool HandleTopLevelStmts(const llvm::SmallVectorImpl<Stmt*> &Stmts) override {
+      if (Diags.hasErrorOccurred())
+        return false; // Abort parsing.
+
+      // FIXME: We should reimplement this in a proper way where we append the
+      // statements to the global init function.
+      static unsigned id = 0;
+      // Make sure to emit all elements of a Decl.
+      for (Stmt *S : Stmts) {
+        Expr *E = cast<Expr>(S);
+        IdentifierInfo* name = &Ctx->Idents.get("_stmt" + std::to_string(id++));
+        VarDecl *VD = VarDecl::Create(*Ctx, Ctx->getTranslationUnitDecl(),
+                                      E->getBeginLoc(), SourceLocation(),
+                                      name, E->getType(), /*TSI*/nullptr,
+                                      SC_None);
+        VD->setInit(E);
+        HandleTopLevelDecl(DeclGroupRef(VD));
+      }
+
+      return true;
+    }
+
     bool HandleTopLevelDecl(DeclGroupRef DG) override {
+      // FIXME: Why not return false and abort parsing?
       if (Diags.hasErrorOccurred())
         return true;
 
Index: clang/lib/CodeGen/CodeGenAction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -237,6 +237,9 @@
 
       return true;
     }
+    bool HandleTopLevelStmts(const llvm::SmallVectorImpl<Stmt*> &Stmts) override {
+      return Gen->HandleTopLevelStmts(Stmts);
+    }
 
     void HandleInlineFunctionDefinition(FunctionDecl *D) override {
       PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
Index: clang/include/clang/Parse/Parser.h
===================================================================
--- clang/include/clang/Parse/Parser.h
+++ clang/include/clang/Parse/Parser.h
@@ -462,6 +462,11 @@
 
   typedef Sema::FullExprArg FullExprArg;
 
+  /// A SmallVector of statements, with stack size 32 (as that is the only one
+  /// used.)
+  typedef SmallVector<Stmt*, 32> StmtVector;
+
+
   // Parsing methods.
 
   /// Initialize - Warm up the parser.
@@ -470,12 +475,14 @@
 
   /// Parse the first top-level declaration in a translation unit.
   bool ParseFirstTopLevelDecl(DeclGroupPtrTy &Result,
-                              Sema::ModuleImportState &ImportState);
+                              Sema::ModuleImportState &ImportState,
+                              StmtVector *Stmts = nullptr);
 
   /// ParseTopLevelDecl - Parse one top-level declaration. Returns true if
   /// the EOF was encountered.
   bool ParseTopLevelDecl(DeclGroupPtrTy &Result,
-                         Sema::ModuleImportState &ImportState);
+                         Sema::ModuleImportState &ImportState,
+                         StmtVector *Stmts = nullptr);
   bool ParseTopLevelDecl() {
     DeclGroupPtrTy Result;
     Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
@@ -2059,9 +2066,6 @@
   //===--------------------------------------------------------------------===//
   // C99 6.8: Statements and Blocks.
 
-  /// A SmallVector of statements, with stack size 32 (as that is the only one
-  /// used.)
-  typedef SmallVector<Stmt*, 32> StmtVector;
   /// A SmallVector of expressions, with stack size 12 (the maximum used.)
   typedef SmallVector<Expr*, 12> ExprVector;
   /// A SmallVector of types.
Index: clang/include/clang/AST/ASTConsumer.h
===================================================================
--- clang/include/clang/AST/ASTConsumer.h
+++ clang/include/clang/AST/ASTConsumer.h
@@ -13,6 +13,10 @@
 #ifndef LLVM_CLANG_AST_ASTCONSUMER_H
 #define LLVM_CLANG_AST_ASTCONSUMER_H
 
+namespace llvm {
+  template <typename T> class SmallVectorImpl;
+}
+
 namespace clang {
   class ASTContext;
   class CXXMethodDecl;
@@ -22,6 +26,7 @@
   class ASTMutationListener;
   class ASTDeserializationListener; // layering violation because void* is ugly
   class SemaConsumer; // layering violation required for safe SemaConsumer
+  class Stmt;
   class TagDecl;
   class VarDecl;
   class FunctionDecl;
@@ -46,6 +51,15 @@
   /// ASTContext.
   virtual void Initialize(ASTContext &Context) {}
 
+  /// HandleTopLevelStmts - Handle the specified top-level statements. This is
+  /// called by the parser to process every top-level Stmt* in incremental
+  /// compilation mode.
+  ///
+  /// \returns true to continue parsing, or false to abort parsing.
+  virtual bool HandleTopLevelStmts(const llvm::SmallVectorImpl<Stmt*>& Stmts) {
+    return true;
+  }
+
   /// HandleTopLevelDecl - Handle the specified top-level declaration.  This is
   /// called by the parser to process every top-level Decl*.
   ///
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to