junaire created this revision. Herald added a project: All. junaire requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
This patch is the first part of the below RFC: https://discourse.llvm.org/t/rfc-handle-execution-results-in-clang-repl/68493 It adds an annotation token which will replace the original EOF token when we are in the incremental C++ mode. In addition, when we're parsing an ExprStmt and there's a missing semicolon after the expression, we set a marker in the annotation token and continue parsing. Eventually, we propogate this info in ParseTopLevelStmtDecl and are able to mark this Decl as something we want to do value printing. Below is a example: clang-repl> int x = 42; clang-repl> x // `x` is a TopLevelStmtDecl and without a semicolon, we should set // it's IsSemiMissing bit so we can do something interesting in // ASTConsumer::HandleTopLevelDecl. The idea about annotation toke is proposed by Richard Smith, thanks! Signed-off-by: Jun Zhang <j...@junz.org> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D148997 Files: clang/include/clang/AST/Decl.h clang/include/clang/Basic/TokenKinds.def clang/include/clang/Parse/Parser.h clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Frontend/PrintPreprocessedOutput.cpp clang/lib/Interpreter/IncrementalParser.cpp clang/lib/Lex/PPLexerChange.cpp clang/lib/Parse/ParseCXXInlineMethods.cpp clang/lib/Parse/ParseDecl.cpp clang/lib/Parse/ParseStmt.cpp clang/lib/Parse/Parser.cpp
Index: clang/lib/Parse/Parser.cpp =================================================================== --- clang/lib/Parse/Parser.cpp +++ clang/lib/Parse/Parser.cpp @@ -320,6 +320,7 @@ case tok::annot_module_begin: case tok::annot_module_end: case tok::annot_module_include: + case tok::annot_repl_input_end: // Stop before we change submodules. They generally indicate a "good" // place to pick up parsing again (except in the special case where // we're trying to skip to EOF). @@ -616,8 +617,8 @@ // Skip over the EOF token, flagging end of previous input for incremental // processing - if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof)) - ConsumeToken(); + if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::annot_repl_input_end)) + ConsumeAnnotationToken(); Result = nullptr; switch (Tok.getKind()) { @@ -697,6 +698,7 @@ return false; case tok::eof: + case tok::annot_repl_input_end: // Check whether -fmax-tokens= was reached. if (PP.getMaxTokens() != 0 && PP.getTokenCount() > PP.getMaxTokens()) { PP.Diag(Tok.getLocation(), diag::warn_max_tokens_total) Index: clang/lib/Parse/ParseStmt.cpp =================================================================== --- clang/lib/Parse/ParseStmt.cpp +++ clang/lib/Parse/ParseStmt.cpp @@ -543,9 +543,22 @@ return ParseCaseStatement(StmtCtx, /*MissingCase=*/true, Expr); } - // Otherwise, eat the semicolon. - ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); - return handleExprStmt(Expr, StmtCtx); + Token *CurTok = nullptr; + // If we're parsing an ExprStmt and the last semicolon is missing and the + // incremental externsion is enabled and we're reaching the end, consider we + // want to do value printing. Note we shouldn't eat the token since the + // callback need it. + if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::annot_repl_input_end)) + CurTok = &Tok; + else + // Otherwise, eat the semicolon. + ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); + + StmtResult R = handleExprStmt(Expr, StmtCtx); + if (!R.isInvalid() && CurTok) + CurTok->setAnnotationValue(R.get()); + + return R; } /// ParseSEHTryBlockCommon Index: clang/lib/Parse/ParseDecl.cpp =================================================================== --- clang/lib/Parse/ParseDecl.cpp +++ clang/lib/Parse/ParseDecl.cpp @@ -2069,6 +2069,7 @@ case tok::annot_module_begin: case tok::annot_module_end: case tok::annot_module_include: + case tok::annot_repl_input_end: return; default: @@ -5453,6 +5454,13 @@ SmallVector<Decl *, 2> DeclsInGroup; DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get())); + + if (Tok.is(tok::annot_repl_input_end) && + Tok.getAnnotationValue() != nullptr) { + ConsumeAnnotationToken(); + cast<TopLevelStmtDecl>(DeclsInGroup.back())->setValuePrinting(); + } + // Currently happens for things like -fms-extensions and use `__if_exists`. for (Stmt *S : Stmts) DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S)); Index: clang/lib/Parse/ParseCXXInlineMethods.cpp =================================================================== --- clang/lib/Parse/ParseCXXInlineMethods.cpp +++ clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -836,6 +836,7 @@ case tok::annot_module_begin: case tok::annot_module_end: case tok::annot_module_include: + case tok::annot_repl_input_end: // Ran out of tokens. return false; @@ -1242,6 +1243,7 @@ case tok::annot_module_begin: case tok::annot_module_end: case tok::annot_module_include: + case tok::annot_repl_input_end: // Ran out of tokens. return false; Index: clang/lib/Lex/PPLexerChange.cpp =================================================================== --- clang/lib/Lex/PPLexerChange.cpp +++ clang/lib/Lex/PPLexerChange.cpp @@ -535,13 +535,19 @@ return LeavingSubmodule; } } - // If this is the end of the main file, form an EOF token. assert(CurLexer && "Got EOF but no current lexer set!"); const char *EndPos = getCurLexerEndPos(); Result.startToken(); CurLexer->BufferPtr = EndPos; - CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); + + if (isIncrementalProcessingEnabled()) { + CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_repl_input_end); + Result.setAnnotationEndLoc(Result.getLocation()); + Result.setAnnotationValue(nullptr); + } else { + CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); + } if (isCodeCompletionEnabled()) { // Inserting the code-completion point increases the source buffer by 1, Index: clang/lib/Interpreter/IncrementalParser.cpp =================================================================== --- clang/lib/Interpreter/IncrementalParser.cpp +++ clang/lib/Interpreter/IncrementalParser.cpp @@ -158,8 +158,8 @@ LastPTU.TUPart = C.getTranslationUnitDecl(); // Skip previous eof due to last incremental input. - if (P->getCurToken().is(tok::eof)) { - P->ConsumeToken(); + if (P->getCurToken().is(tok::annot_repl_input_end)) { + P->ConsumeAnyToken(); // FIXME: Clang does not call ExitScope on finalizing the regular TU, we // might want to do that around HandleEndOfTranslationUnit. P->ExitScope(); @@ -259,12 +259,12 @@ Token Tok; do { PP.Lex(Tok); - } while (Tok.isNot(tok::eof)); + } while (Tok.isNot(tok::annot_repl_input_end)); } Token AssertTok; PP.Lex(AssertTok); - assert(AssertTok.is(tok::eof) && + assert(AssertTok.is(tok::annot_repl_input_end) && "Lexer must be EOF when starting incremental parse!"); if (CodeGenerator *CG = getCodeGen(Act.get())) { Index: clang/lib/Frontend/PrintPreprocessedOutput.cpp =================================================================== --- clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -663,7 +663,8 @@ // them. if (Tok.is(tok::eof) || (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && - !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end))) + !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && + !Tok.is(tok::annot_repl_input_end))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -819,6 +820,9 @@ // -traditional-cpp the lexer keeps /all/ whitespace, including comments. PP.Lex(Tok); continue; + } else if (Tok.is(tok::annot_repl_input_end)) { + PP.Lex(Tok); + continue; } else if (Tok.is(tok::eod)) { // Don't print end of directive tokens, since they are typically newlines // that mess up our line tracking. These come from unknown pre-processor Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -7216,8 +7216,14 @@ } void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { - assert(DeferredDeclsToEmit.empty() && - "Should have emitted all decls deferred to emit."); + // FIXME: Re-enable the assertions once we fix regular codegen to not leave + // weak references behind. + // The code example also leaves entries in WeakRefReferences in regular clang. + // #include <memory> + // auto p = std::make_shared<int>(42); + // + // assert(DeferredDeclsToEmit.empty() && + // "Should have emitted all decls deferred to emit."); assert(NewBuilder->DeferredDecls.empty() && "Newly created module should not have deferred decls"); NewBuilder->DeferredDecls = std::move(DeferredDecls); Index: clang/include/clang/Parse/Parser.h =================================================================== --- clang/include/clang/Parse/Parser.h +++ clang/include/clang/Parse/Parser.h @@ -18,6 +18,7 @@ #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/DeclSpec.h" @@ -692,7 +693,8 @@ bool isEofOrEom() { tok::TokenKind Kind = Tok.getKind(); return Kind == tok::eof || Kind == tok::annot_module_begin || - Kind == tok::annot_module_end || Kind == tok::annot_module_include; + Kind == tok::annot_module_end || Kind == tok::annot_module_include || + Kind == tok::annot_repl_input_end; } /// Checks if the \p Level is valid for use in a fold expression. Index: clang/include/clang/Basic/TokenKinds.def =================================================================== --- clang/include/clang/Basic/TokenKinds.def +++ clang/include/clang/Basic/TokenKinds.def @@ -942,6 +942,9 @@ // into the name of a header unit. ANNOTATION(header_unit) +// Annotation for end of input in clang-repl. +ANNOTATION(repl_input_end) + #undef PRAGMA_ANNOTATION #undef ANNOTATION #undef TESTING_KEYWORD Index: clang/include/clang/AST/Decl.h =================================================================== --- clang/include/clang/AST/Decl.h +++ clang/include/clang/AST/Decl.h @@ -4324,6 +4324,7 @@ friend class ASTDeclWriter; Stmt *Statement = nullptr; + bool IsSemiMissing = false; TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S) : Decl(TopLevelStmt, DC, L), Statement(S) {} @@ -4337,6 +4338,12 @@ SourceRange getSourceRange() const override LLVM_READONLY; Stmt *getStmt() { return Statement; } const Stmt *getStmt() const { return Statement; } + void setStmt(Stmt *S) { + assert(IsSemiMissing && "Operation supported for printing values only!"); + Statement = S; + } + bool isValuePrinting() const { return IsSemiMissing; } + void setValuePrinting(bool Missing = true) { IsSemiMissing = Missing; } static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == TopLevelStmt; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits