https://github.com/yronglin created https://github.com/llvm/llvm-project/pull/107168
None >From fb028015b8f86f87b6d1643e91c21531c768a198 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Sat, 17 Aug 2024 16:54:26 +0800 Subject: [PATCH 1/2] [Clang] Add peekNextPPToken, makes peek next token without side-effects Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Lexer.h | 10 ++++---- clang/include/clang/Lex/Preprocessor.h | 8 ++++++- clang/include/clang/Lex/TokenLexer.h | 7 +++--- clang/lib/Lex/Lexer.cpp | 21 +++++++++-------- clang/lib/Lex/PPMacroExpansion.cpp | 32 ++++++++++++-------------- clang/lib/Lex/TokenLexer.cpp | 10 ++++---- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index b6ecc7e5ded9e2..1e665c13b392f2 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer { //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code - // in Lexer::isNextPPTokenLParen. + // in Lexer::peekNextPPToken. // BufferPtr - Current pointer into the buffer. This is the next character // to be lexed. @@ -629,10 +629,10 @@ class Lexer : public PreprocessorLexer { BufferPtr = TokEnd; } - /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a - /// tok::l_paren token, 0 if it is something else and 2 if there are no more - /// tokens in the buffer controlled by this lexer. - unsigned isNextPPTokenLParen(); + /// peekNextPPToken - Return std::nullopt if there are no more tokens in the + /// buffer controlled by this lexer, otherwise return the next unexpanded + /// token. + std::optional<Token> peekNextPPToken(); //===--------------------------------------------------------------------===// // Lexer character reading interfaces. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 1307659e27d137..0ab138974aeb20 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2650,10 +2650,16 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); + /// Peek the next token. If so, return the token, if not, this + /// method should have no observable side-effect on the lexed tokens. + std::optional<Token> peekNextPPToken(); + /// Determine whether the next preprocessor token to be /// lexed is a '('. If so, consume the token and return true, if not, this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); + bool isNextPPTokenLParen() { + return peekNextPPToken().value_or(Token{}).is(tok::l_paren); + } /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 4d229ae6106743..777b4e6266c714 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,10 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the - /// expansion stack, return 2. If the next unexpanded token is a '(', return - /// 1, otherwise return 0. - unsigned isNextTokenLParen() const; + /// If the next token lexed will pop this macro off the expansion stack, + /// return std::nullopt, otherwise return the next unexpanded token. + std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. bool Lex(Token &Tok); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index ef1e1f4bd9aeb4..af533b3874cf5d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3193,18 +3193,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return PP->HandleEndOfFile(Result, isPragmaLexer()); } -/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from -/// the specified lexer will return a tok::l_paren token, 0 if it is something -/// else and 2 if there are no more tokens in the buffer controlled by the -/// lexer. -unsigned Lexer::isNextPPTokenLParen() { +/// peekNextPPToken - Return std::nullopt if there are no more tokens in the +/// buffer controlled by this lexer, otherwise return the next unexpanded +/// token. +std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); if (isDependencyDirectivesLexer()) { if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return 2; - return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( - tok::l_paren); + return std::nullopt; + Token Result; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); + return Result; } // Switch to 'skipping' mode. This will ensure that we can lex a token @@ -3233,8 +3234,8 @@ unsigned Lexer::isNextPPTokenLParen() { LexingRawMode = false; if (Tok.is(tok::eof)) - return 2; - return Tok.is(tok::l_paren); + return std::nullopt; + return Tok; } /// Find the end of a version control conflict marker. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 1d671ab72b0c03..0daa7fa96f89b1 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -437,42 +437,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Determine whether the next preprocessor token to be -/// lexed is a '('. If so, consume the token and return true, if not, this -/// method should have no observable side-effect on the lexed tokens. -bool Preprocessor::isNextPPTokenLParen() { +/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not, +/// this method should have no observable side-effect on the lexed tokens. +std::optional<Token> Preprocessor::peekNextPPToken() { // Do some quick tests for rejection cases. - unsigned Val; + std::optional<Token> Val; if (CurLexer) - Val = CurLexer->isNextPPTokenLParen(); + Val = CurLexer->peekNextPPToken(); else - Val = CurTokenLexer->isNextTokenLParen(); + Val = CurTokenLexer->peekNextPPToken(); - if (Val == 2) { + if (!Val) { // We have run off the end. If it's a source file we don't // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the // macro stack. if (CurPPLexer) - return false; + return std::nullopt; for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { if (Entry.TheLexer) - Val = Entry.TheLexer->isNextPPTokenLParen(); + Val = Entry.TheLexer->peekNextPPToken(); else - Val = Entry.TheTokenLexer->isNextTokenLParen(); + Val = Entry.TheTokenLexer->peekNextPPToken(); - if (Val != 2) + if (Val) break; // Ran off the end of a source file? if (Entry.ThePPLexer) - return false; + return std::nullopt; } } - // Okay, if we know that the token is a '(', lex it and return. Otherwise we - // have found something that isn't a '(' or we found the end of the - // translation unit. In either case, return false. - return Val == 1; + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val; } /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 856d5682727fe3..0eca09ef93da92 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -922,13 +922,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, } /// isNextTokenLParen - If the next token lexed will pop this macro off the -/// expansion stack, return 2. If the next unexpanded token is a '(', return -/// 1, otherwise return 0. -unsigned TokenLexer::isNextTokenLParen() const { +/// expansion stack, return std::nullopt, otherwise return the next unexpanded +/// token. +std::optional<Token> TokenLexer::peekNextPPToken() const { // Out of tokens? if (isAtEnd()) - return 2; - return Tokens[CurTokenIdx].is(tok::l_paren); + return std::nullopt; + return Tokens[CurTokenIdx]; } /// isParsingPreprocessorDirective - Return true if we are in the middle of a >From b7b4e2549a5fb723b455768f2bcf75bfab6cab04 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Wed, 4 Sep 2024 00:57:52 +0800 Subject: [PATCH 2/2] [C++20][Modules] Implement P1857R3 Modules Dependency Discovery Signed-off-by: yronglin <yronglin...@gmail.com> --- .../include/clang/Basic/DiagnosticLexKinds.td | 6 +- .../clang/Basic/DiagnosticParseKinds.td | 4 +- clang/include/clang/Basic/IdentifierTable.h | 27 +- clang/include/clang/Basic/TokenKinds.def | 6 + clang/include/clang/Lex/Preprocessor.h | 87 +++++- clang/include/clang/Lex/Token.h | 7 + clang/include/clang/Parse/Parser.h | 4 - clang/lib/Basic/IdentifierTable.cpp | 4 +- clang/lib/Lex/DependencyDirectivesScanner.cpp | 28 +- clang/lib/Lex/Lexer.cpp | 34 +- clang/lib/Lex/PPDirectives.cpp | 214 ++++++++++++- clang/lib/Lex/Preprocessor.cpp | 293 +++++------------- clang/lib/Lex/TokenLexer.cpp | 3 +- clang/lib/Parse/Parser.cpp | 65 +--- .../Lex/DependencyDirectivesScannerTest.cpp | 10 +- 15 files changed, 483 insertions(+), 309 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 12d7b8c0205ee9..afd56fb44b2e71 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -477,7 +477,7 @@ def warn_cxx98_compat_variadic_macro : Warning< def ext_named_variadic_macro : Extension< "named variadic macros are a GNU extension">, InGroup<VariadicMacros>; def err_embedded_directive : Error< - "embedding a #%0 directive within macro arguments is not supported">; + "embedding a %select{#|C++ }0%1 directive within macro arguments is not supported">; def ext_embedded_directive : Extension< "embedding a directive within macro arguments has undefined behavior">, InGroup<DiagGroup<"embedded-directive">>; @@ -952,6 +952,10 @@ def warn_module_conflict : Warning< InGroup<ModuleConflict>; // C++20 modules +def err_module_decl_in_header : Error< + "module declaration must not come from an #include directive">; +def err_pp_cond_span_module_decl : Error< + "preprocessor conditionals shall not span a module declaration">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 0b8ab4bf092509..c8654f12cb349a 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1695,8 +1695,8 @@ def ext_bit_int : Extension< } // end of Parse Issue category. let CategoryName = "Modules Issue" in { -def err_unexpected_module_decl : Error< - "module declaration can only appear at the top level">; +def err_unexpected_module_or_import_decl : Error< + "%select{module|import}0 declaration can only appear at the top level">; def err_module_expected_ident : Error< "expected a module name after '%select{module|import}0'">; def err_attribute_not_module_attr : Error< diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index ae9ebd9f59154e..9423676351e378 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsFinal : 1; - // 22 bits left in a 64-bit word. + // 21 bits left in a 64-bit word. // Managed by the language front-end. void *FETokenInfo = nullptr; @@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -737,10 +753,11 @@ class IdentifierTable { // contents. II->Entry = &Entry; - // If this is the 'import' contextual keyword, mark it as such. + // If this is the 'import' or 'module' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); - + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 212c1f6ff3a124..6416920539db53 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -129,6 +129,9 @@ PPKEYWORD(pragma) // C23 & C++26 #embed PPKEYWORD(embed) +// C++20 Module Directive +PPKEYWORD(module) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -1014,6 +1017,9 @@ ANNOTATION(module_include) ANNOTATION(module_begin) ANNOTATION(module_end) +// Annotations for C++, Clang and Objective-C named modules. +ANNOTATION(module_name) + // Annotation for a header_name token that has been looked up and transformed // into the name of a header unit. ANNOTATION(header_unit) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 0ab138974aeb20..a9acde7ac51df3 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -128,6 +128,70 @@ enum class EmbedResult { Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__ }; +/// Represents module or partition name token sequance. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// This class can only be created by the preprocessor and guarantees that the +/// two source array being contiguous in memory and only contains 3 kind of +/// tokens (identifier, '.' and ':'). And only available when the preprocessor +/// returns annot_module_name token. +/// +/// For exmaple: +/// +/// export module m.n:c.d +/// +/// The module name array has 3 tokens ['m', '.', 'n']. +/// The partition name array has 4 tokens [':', 'c', '.', 'd']. +/// +/// When import a partition in a named module fragment (Eg. import :part1;), +/// the module name array will be empty, and the partition name array has 2 +/// tokens. +/// +/// When we meet a private-module-fragment (Eg. module :private;), preprocessor +/// will not return a annot_module_name token, but will return 2 separate tokens +/// [':', 'kw_private']. +class ModuleNameInfo { + friend class Preprocessor; + ArrayRef<Token> ModuleName; + ArrayRef<Token> PartitionName; + + ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex); + +public: + /// Return the contiguous token array. + ArrayRef<Token> getTokens() const { + if (ModuleName.empty()) + return PartitionName; + if (PartitionName.empty()) + return ModuleName; + return ArrayRef(ModuleName.begin(), PartitionName.end()); + } + bool hasModuleName() const { return !ModuleName.empty(); } + bool hasPartitionName() const { return !PartitionName.empty(); } + ArrayRef<Token> getModuleName() const { return ModuleName; } + ArrayRef<Token> getPartitionName() const { return PartitionName; } + Token getColonToken() const { + assert(hasPartitionName() && "Do not have a partition name"); + return getPartitionName().front(); + } + + /// Under the standard C++ Modules, the dot is just part of the module name, + /// and not a real hierarchy separator. Flatten such module names now. + std::string getFlatName() const; + + void buildNamedModuleIdPath( + Preprocessor &P, + SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const; +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -336,6 +400,15 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; + + struct ExportContextualKeywordInfo { + Token ExportTok; + bool TokAtPhysicalStartOfLine; + }; + + /// Whether the last token we lexed was an 'export' keyword. + std::optional<ExportContextualKeywordInfo> LastTokenWasExportKeyword = + std::nullopt; /// A position within a C++20 import-seq. class StdCXXImportSeq { @@ -1767,6 +1840,17 @@ class Preprocessor { std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, bool ForHasEmbed); + bool LexModuleNameOrHeaderName(Token &Result, bool IsImport); + /// Callback invoked when the lexer sees one of export, import or module token + /// at the start of a line. + /// + /// This consumes the import, module directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + bool HandleModuleContextualKeyword(Token &Result, bool TokAtPhysicalStartOfLine); + + void HandleModuleDirective(Token &ModuleOrImportKeyword); + void LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport); bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); @@ -2344,7 +2428,7 @@ class Preprocessor { /// /// \return The location of the end of the directive (the terminating /// newline). - SourceLocation CheckEndOfDirective(const char *DirType, + SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros = false); /// Read and discard all tokens remaining on the current line until @@ -2785,6 +2869,7 @@ class Preprocessor { void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); void HandleImportDirective(SourceLocation HashLoc, Token &Tok); + void HandleCXXModuleOrImportDirective(Token &KeywordTok); void HandleMicrosoftImportDirective(Token &Tok); public: diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 4f29fb7d114159..8e81207ddf8d7d 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -231,6 +231,9 @@ class Token { PtrData = const_cast<char*>(Ptr); } + template <class T> T getAnnotationValueAs() const { + return static_cast<T>(getAnnotationValue()); + } void *getAnnotationValue() const { assert(isAnnotation() && "Used AnnotVal on non-annotation token"); return PtrData; @@ -289,6 +292,10 @@ class Token { /// Return the ObjC keyword kind. tok::ObjCKeywordKind getObjCKeywordID() const; + /// Return true if we have an C++20 Modules contextual keyword(export, import + /// or module). + bool isModuleContextualKeyword(bool AllowExport = true) const; + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; /// Return true if this token has trigraphs or escaped newlines in it. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a7513069ff5da0..2bb1af2e01b527 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler { mutable IdentifierInfo *Ident_GNU_final; mutable IdentifierInfo *Ident_override; - // C++2a contextual keywords. - mutable IdentifierInfo *Ident_import; - mutable IdentifierInfo *Ident_module; - // C++ type trait keywords that can be reverted to identifiers and still be // used as type traits. llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits; diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index c9c9d927a5902e..10ed86d5d5990c 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -326,8 +326,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keyword. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the @@ -456,6 +457,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 6, 'd', 'f', define); CASE( 6, 'i', 'n', ifndef); CASE( 6, 'i', 'p', import); + CASE( 6, 'm', 'd', module); CASE( 6, 'p', 'a', pragma); CASE( 7, 'd', 'f', defined); diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 088d1cc96e3a21..58e92977f99cc2 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -497,21 +497,32 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; for (;;) { const dependency_directives_scan::Token &Tok = lexToken(First, End); - if (Tok.is(tok::eof)) + if (Tok.isOneOf(tok::eof, tok::eod)) return reportError( DirectiveLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); if (Tok.is(tok::semi)) break; } + + // Skip extra tokens after semi in C++20 Modules directive. + bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl; + if (IsCXXModules) + lexPPDirectiveBody(First, End); pushDirective(Kind); skipWhitespace(First, End); if (First == End) return false; - if (!isVerticalWhitespace(*First)) - return reportError( - DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); - skipNewline(First, End); + if (!IsCXXModules) { + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, + diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + } return false; } @@ -846,8 +857,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { if (*First == '@') return lexAt(First, End); - if (*First == 'i' || *First == 'e' || *First == 'm') - return lexModule(First, End); + // if (!LangOpts.CPlusPlusModules && (*First == 'i' || *First == 'e' || *First == 'm')) + // return lexModule(First, End); if (*First == '_') { if (isNextIdentifierOrSkipLine("_Pragma", First, End)) @@ -860,7 +871,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { TheLexer.setParsingPreprocessorDirective(true); auto ScEx2 = make_scope_exit( [&]() { TheLexer.setParsingPreprocessorDirective(false); }); - + if (*First == 'i' || *First == 'e' || *First == 'm') + return lexModule(First, End); // Lex '#'. const dependency_directives_scan::Token &HashTok = lexToken(First, End); if (HashTok.is(tok::hashhash)) { diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index af533b3874cf5d..9f363e70c66541 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -74,6 +74,19 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } +/// Return true if we have an C++20 Modules contextual keyword(export, import +/// or module). +bool Token::isModuleContextualKeyword(bool AllowExport) const { + if (AllowExport && is(tok::kw_export)) + return true; + if (isOneOf(tok::kw_import, tok::kw_module)) + return true; + if (isNot(tok::identifier)) + return false; + const auto *II = getIdentifierInfo(); + return II->isModulesImport() || II->isModulesDeclaration(); +} + /// Determine whether the token kind starts a simple-type-specifier. bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { switch (getKind()) { @@ -3996,11 +4009,17 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': - case '_': + case '_': { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexIdentifierContinue(Result, CurPtr); - + bool returnedToken = LexIdentifierContinue(Result, CurPtr); + if (returnedToken && Result.isModuleContextualKeyword() && + LangOpts.CPlusPlusModules && + PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine) && + !LexingRawMode && !Is_PragmaLexer) + goto HandleDirective; + return returnedToken; + } case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) @@ -4484,8 +4503,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { HandleDirective: // We parsed a # character and it's the start of a preprocessing directive. - - FormTokenWithChars(Result, CurPtr, tok::hash); + if (!Result.isOneOf(tok::kw_import, tok::kw_module)) + FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); if (PP->hadModuleLoaderFatalFailure()) @@ -4559,6 +4578,11 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { Result.setRawIdentifierData(TokPtr); if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (Result.isModuleContextualKeyword() && + PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) { + PP->HandleDirective(Result); + return false; + } if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 4e77df9ec444c7..b40de9d9184048 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -413,7 +413,7 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, /// true, then we consider macros that expand to zero tokens as being ok. /// /// Returns the location of the end of the directive. -SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, +SourceLocation Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros) { Token Tmp; // Lex unexpanded tokens for most directives: macros might expand to zero @@ -1219,9 +1219,14 @@ void Preprocessor::HandleDirective(Token &Result) { // Save the '#' token in case we need to return it later. Token SavedHash = Result; + bool IsCXX20ImportOrModuleDirective = + getLangOpts().CPlusPlusModules && + Result.isModuleContextualKeyword(/*AllowExport=*/false); + // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. - LexUnexpandedToken(Result); + if (!IsCXX20ImportOrModuleDirective) + LexUnexpandedToken(Result); // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: // #define A(x) #x @@ -1240,7 +1245,9 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp___include_macros: case tok::pp_pragma: case tok::pp_embed: - Diag(Result, diag::err_embedded_directive) << II->getName(); + case tok::pp_module: + Diag(Result, diag::err_embedded_directive) + << IsCXX20ImportOrModuleDirective << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); DiscardUntilEndOfDirective(); @@ -1331,9 +1338,12 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); - + case tok::pp_module: + return HandleCXXModuleOrImportDirective(Result); // GNU Extensions. case tok::pp_import: + if (IsCXX20ImportOrModuleDirective) + return HandleCXXModuleOrImportDirective(Result); return HandleImportDirective(SavedHash.getLocation(), Result); case tok::pp_include_next: return HandleIncludeNextDirective(SavedHash.getLocation(), Result); @@ -4012,3 +4022,199 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, *Params); HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents); } + +void Preprocessor::LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport) { + Token Result; + Suffix.clear(); +Retry: + if (IsImport && getLangOpts().CPlusPlusModules) { + (void) LexHeaderName(Result); + if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { + std::string Name = ModuleDeclState.getPrimaryName().str(); + Name += ":"; + NamedModuleImportPath.push_back( + {getIdentifierInfo(Name), Result.getLocation()}); + } + } else { + Lex(Result); + } + + Suffix.push_back(Result); + if (Result.isOneOf(tok::eof, tok::eod)) + return; + if (Result.is(tok::code_completion)) + goto Retry; + + // Lex a module name + if (Result.isOneOf(tok::identifier, tok::colon)) { + bool ExpectsIdentifier = Result.is(tok::colon); + if (Result.is(tok::identifier)) + NamedModuleImportPath.push_back( + std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); + while (true) { + Lex(Result); + Suffix.push_back(Result); + if (ExpectsIdentifier && Result.is(tok::identifier)) { + ExpectsIdentifier = false; + // We expected to see an identifier here, and we did; continue handling + // identifiers. + NamedModuleImportPath.push_back( + std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); + continue; + } + + if (!ExpectsIdentifier && Result.is(tok::period)) { + ExpectsIdentifier = true; + continue; + } + + // Module partition only allowed in C++20 Modules. + // FIXME: Should we accept partition here for error recovery? + if (!ExpectsIdentifier && Result.is(tok::colon)) { + ExpectsIdentifier = true; + continue; + } + break; + } + + // Under the standard C++ Modules, the dot is just part of the module name, + // and not a real hierarchy separator. Flatten such module names now. + // + // FIXME: Is this the right level to be performing this transformation? + std::string FlatModuleName; + if (getLangOpts().CPlusPlusModules) { + for (auto &Piece : NamedModuleImportPath) { + // If the FlatModuleName ends with colon, it implies it is a partition. + if (!FlatModuleName.empty() && FlatModuleName.back() != ':') + FlatModuleName += "."; + FlatModuleName += Piece.first->getName(); + } + SourceLocation FirstPathLoc = NamedModuleImportPath[0].second; + NamedModuleImportPath.clear(); + NamedModuleImportPath.push_back( + std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); + } + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + SourceLocation SemiLoc = Result.getLocation(); + if (Result.isNot(tok::semi)) { + CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) + return; + SemiLoc = Suffix.back().getLocation(); + } + + if (!IsImport) + return; + + if (getLangOpts().CPlusPlusModules && Suffix.front().is(tok::colon) && + !ModuleDeclState.isNamedModule()) + return; + + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + if (getLangOpts().CPlusPlusModules && SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + // Check for a header-name. + if (IsImport && Result.is(tok::header_name)) { + // Reconstitute the import token. + Token ImportTok; + ImportTok.startToken(); + ImportTok.setKind(tok::kw_import); + ImportTok.setLocation(ModuleImportLoc); + ImportTok.setIdentifierInfo(getIdentifierInfo("import")); + ImportTok.setLength(6); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Result, SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; + + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + Suffix.emplace_back(); + Suffix.back().startToken(); + Suffix.back().setKind(tok::annot_module_begin); + Suffix.back().setLocation(SemiLoc); + Suffix.back().setAnnotationEndLoc(SemiLoc); + Suffix.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + Suffix[0].setKind(tok::annot_header_unit); + Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); + Suffix[0].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + Result.setKind(tok::eof); + Suffix.clear(); + Suffix.push_back(Result); + CurLexer->cutOffLexing(); + return; + } + return; + } + + // Check module name + Module *Imported = nullptr; + // We don't/shouldn't load the standard c++20 modules when preprocessing. + if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { + Imported = TheModuleLoader.loadModule(ModuleImportLoc, + NamedModuleImportPath, + Module::Hidden, + /*IsInclusionDirective=*/false); + if (Imported) + makeModuleVisible(Imported, SemiLoc); + } + + if (Callbacks) + Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); +} + +void Preprocessor::HandleCXXModuleOrImportDirective(Token &ModuleTok) { + assert(ModuleTok.isOneOf(tok::kw_import, tok::kw_module)); + SourceLocation DirectiveStartLoc = + LastTokenWasExportKeyword + ? LastTokenWasExportKeyword->ExportTok.getLocation() + : ModuleTok.getLocation(); + bool IsImport = ModuleTok.is(tok::kw_import); + SmallVector<Token, 32> Suffix; + LexAfterModuleImport(Suffix, IsImport); + + if (!Suffix.empty() && Suffix.back().is(tok::semi)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName()); + + if (ModuleTok.is(tok::kw_module)) { + if (!IncludeMacroStack.empty()) { + Diag(DirectiveStartLoc, diag::err_module_decl_in_header) + << SourceRange(DirectiveStartLoc, Suffix.back().getLocation()); + } + + if (CurPPLexer->getConditionalStackDepth() != 0) { + Diag(DirectiveStartLoc, diag::err_pp_cond_span_module_decl) + << SourceRange(DirectiveStartLoc, Suffix.back().getLocation()); + } + } + + unsigned NumTokens = Suffix.size() + 1; + // Allocate a holding buffer for a sequence of tokens and introduce it into + // the token stream. + auto ToksCopy = std::make_unique<Token[]>(NumTokens); + ToksCopy[0] = ModuleTok; + std::copy(Suffix.begin(), Suffix.end(), &ToksCopy[1]); + EnterTokenStream(std::move(ToksCopy), NumTokens, + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); + return; +} diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index f0b4593e0cc22e..eb694ce41566fb 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -905,6 +905,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -925,27 +926,20 @@ void Preprocessor::Lex(Token &Result) { case tok::period: ModuleDeclState.handlePeriod(); break; - case tok::identifier: - // Check "import" and "module" when there is no open bracket. The two - // identifiers are not meaningful with open brackets. + case tok::kw_module: + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); + break; + case tok::kw_import: if (StdCXXImportSeqState.atTopLevel()) { - if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); - StdCXXImportSeqState.handleImport(); - if (StdCXXImportSeqState.afterImportSeq()) { - ModuleImportLoc = Result.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = false; - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - } - break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { - TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); - ModuleDeclState.handleModule(); - break; - } + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); } + // ModuleImportLoc = Result.getLocation(); + // NamedModuleImportPath.clear(); + // IsAtImport = false; + break; + case tok::identifier: ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); if (ModuleDeclState.isModuleCandidate()) break; @@ -964,6 +958,8 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + if (Result.isNot(tok::kw_export)) + LastTokenWasExportKeyword.reset(); --LexLevel; if ((LexLevel == 0 || PreprocessToken) && @@ -1111,7 +1107,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { if (BracketDepth == 0) return; break; - + case tok::eod: case tok::eof: return; @@ -1121,213 +1117,82 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { } } - -/// Lex a token following the 'import' contextual keyword. -/// -/// pp-import: [C++20] -/// import header-name pp-import-suffix[opt] ; -/// import header-name-tokens pp-import-suffix[opt] ; -/// [ObjC] @ import module-name ; -/// [Clang] import module-name ; -/// -/// header-name-tokens: -/// string-literal -/// < [any sequence of preprocessing-tokens other than >] > -/// -/// module-name: -/// module-name-qualifier[opt] identifier -/// -/// module-name-qualifier -/// module-name-qualifier[opt] identifier . +/// P1857R3: Modules Dependency Discovery /// -/// We respond to a pp-import by importing macros from the named module. -bool Preprocessor::LexAfterModuleImport(Token &Result) { - // Figure out what kind of lexer we actually have. - recomputeCurLexerKind(); +/// At the start of phase 4 an import or module token is treated as starting a +/// directive and are converted to their respective keywords iff: +/// • After skipping horizontal whitespace are +/// • at the start of a logical line, or +/// • preceded by an 'export' at the start of the logical line. +/// • Are followed by an identifier pp token (before macro expansion), or +/// • <, ", or : (but not ::) pp tokens for 'import', or +/// • ; for 'module' +/// Otherwise the token is treated as an identifier. +bool Preprocessor::HandleModuleContextualKeyword( + Token &Result, bool TokAtPhysicalStartOfLine) { + if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword()) + return false; - // Lex the next token. The header-name lexing rules are used at the start of - // a pp-import. - // - // For now, we only support header-name imports in C++20 mode. - // FIXME: Should we allow this in all language modes that support an import - // declaration as an extension? - if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { - if (LexHeaderName(Result)) - return true; - - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.push_back( - {getIdentifierInfo(Name), Result.getLocation()}); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); + if (Result.is(tok::kw_export)) { + LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine}; + return false; } - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef<Token> Toks) { - auto ToksCopy = std::make_unique<Token[]>(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; - - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. - SmallVector<Token, 32> Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); - - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. - EnterTokens(Suffix); + if (LastTokenWasExportKeyword) { + auto Export = *LastTokenWasExportKeyword; + LastTokenWasExportKeyword.reset(); + // The export keyword was not at the start of line, it's not a + // directive-introducing token. + if (!Export.TokAtPhysicalStartOfLine) return false; - } - - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); - return true; - } - - EnterTokens(Suffix); + // [cpp.pre]/1.4 + // export // not a preprocessing directive + // import foo; // preprocessing directive (ill-formed at phase + // 7) + if (TokAtPhysicalStartOfLine) + return false; + } else if (!TokAtPhysicalStartOfLine) return false; - } - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.push_back( - std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we didn't recognize a module name at all, this is not a (valid) import. - if (NamedModuleImportPath.empty() || Result.is(tok::eof)) + bool SavedParsingPreprocessorDirective = CurPPLexer->ParsingPreprocessorDirective; + CurPPLexer->ParsingPreprocessorDirective = true; + // Peek next token. + auto NextTok = peekNextPPToken().value_or(Token{}); + CurPPLexer->ParsingPreprocessorDirective = SavedParsingPreprocessorDirective; + if (Result.getIdentifierInfo()->isModulesImport() && + NextTok.isOneOf(tok::raw_identifier, tok::less, tok::string_literal, + tok::colon)) { + Result.setKind(tok::kw_import); + NamedModuleImportPath.clear(); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; return true; - - // Consume the pp-import-suffix and expand any macros in it now, if we're not - // at the semicolon already. - SourceLocation SemiLoc = Result.getLocation(); - if (Result.isNot(tok::semi)) { - Suffix.push_back(Result); - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not an import after all. - EnterTokens(Suffix); - return false; - } - SemiLoc = Suffix.back().getLocation(); } - - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.first->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].second; + if (Result.getIdentifierInfo()->isModulesDeclaration() && + NextTok.isOneOf(tok::raw_identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); NamedModuleImportPath.clear(); - NamedModuleImportPath.push_back( - std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); - } - - Module *Imported = nullptr; - // We don't/shouldn't load the standard c++20 modules when preprocessing. - if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { - Imported = TheModuleLoader.loadModule(ModuleImportLoc, - NamedModuleImportPath, - Module::Hidden, - /*IsInclusionDirective=*/false); - if (Imported) - makeModuleVisible(Imported, SemiLoc); + return true; } - if (Callbacks) - Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); + // Ok, it's an identifier. + return false; +} - if (!Suffix.empty()) { - EnterTokens(Suffix); - return false; - } - return true; +bool Preprocessor::LexAfterModuleImport(Token &Result) { + recomputeCurLexerKind(); + // Allocate a holding buffer for a sequence of tokens and introduce it into + // the token stream. + auto EnterTokens = [this](ArrayRef<Token> Toks) { + auto ToksCopy = std::make_unique<Token[]>(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); + }; + SmallVector<Token, 32> Suffix; + LexAfterModuleImport(Suffix, true); + EnterTokens(Suffix); + return false; } void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 0eca09ef93da92..8fc39a5c517b81 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -700,7 +700,8 @@ bool TokenLexer::Lex(Token &Tok) { HasLeadingSpace = false; // Handle recursive expansion! - if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { + if (!Tok.isAnnotation() && !Tok.isModuleContextualKeyword() && + Tok.getIdentifierInfo() != nullptr) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 04c2f1d380bc48..6d3312684c1f90 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -514,8 +514,6 @@ void Parser::Initialize() { Ident_abstract = nullptr; Ident_override = nullptr; Ident_GNU_final = nullptr; - Ident_import = nullptr; - Ident_module = nullptr; Ident_super = &PP.getIdentifierTable().get("super"); @@ -571,11 +569,6 @@ void Parser::Initialize() { PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block); } - if (getLangOpts().CPlusPlusModules) { - Ident_import = PP.getIdentifierInfo("import"); - Ident_module = PP.getIdentifierInfo("module"); - } - Actions.Initialize(); // Prime the lexer look-ahead. @@ -639,30 +632,12 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, switch (NextToken().getKind()) { case tok::kw_module: goto module_decl; - - // Note: no need to handle kw_import here. We only form kw_import under - // the Standard C++ Modules, and in that case 'export import' is parsed as - // an export-declaration containing an import-declaration. - - // Recognize context-sensitive C++20 'export module' and 'export import' - // declarations. - case tok::identifier: { - IdentifierInfo *II = NextToken().getIdentifierInfo(); - if ((II == Ident_module || II == Ident_import) && - GetLookAheadToken(2).isNot(tok::coloncolon)) { - if (II == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - } - + case tok::kw_import: + goto import_decl; default: break; } break; - case tok::kw_module: module_decl: Result = ParseModuleDecl(ImportState); @@ -725,22 +700,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, Actions.ActOnEndOfTranslationUnit(); //else don't tell Sema that we ended parsing: more input might come. return true; - - case tok::identifier: - // C++2a [basic.link]p3: - // A token sequence beginning with 'export[opt] module' or - // 'export[opt] import' and not immediately followed by '::' - // is never interpreted as the declaration of a top-level-declaration. - if ((Tok.getIdentifierInfo() == Ident_module || - Tok.getIdentifierInfo() == Ident_import) && - NextToken().isNot(tok::coloncolon)) { - if (Tok.getIdentifierInfo() == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - default: break; } @@ -956,14 +915,6 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, }; Actions.CodeCompletion().CodeCompleteOrdinaryName(getCurScope(), PCC); return nullptr; - case tok::kw_import: { - Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module; - if (getLangOpts().CPlusPlusModules) { - llvm_unreachable("not expecting a c++20 import here"); - ProhibitAttributes(Attrs); - } - SingleDecl = ParseModuleImport(SourceLocation(), IS); - } break; case tok::kw_export: if (getLangOpts().CPlusPlusModules || getLangOpts().HLSL) { ProhibitAttributes(Attrs); @@ -1048,9 +999,10 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, case tok::kw___if_not_exists: ParseMicrosoftIfExistsExternalDeclaration(); return nullptr; - + case tok::kw_import: case tok::kw_module: - Diag(Tok, diag::err_unexpected_module_decl); + Diag(Tok, diag::err_unexpected_module_or_import_decl) + << Tok.is(tok::kw_import); SkipUntil(tok::semi); return nullptr; @@ -2462,10 +2414,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { ? Sema::ModuleDeclKind::Interface : Sema::ModuleDeclKind::Implementation; - assert( - (Tok.is(tok::kw_module) || - (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) && - "not a module declaration"); + assert(Tok.is(tok::kw_module) && "not a module declaration"); SourceLocation ModuleLoc = ConsumeToken(); // Attributes appear after the module name, not before. @@ -2557,7 +2506,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, SourceLocation ExportLoc; TryConsumeToken(tok::kw_export, ExportLoc); - assert((AtLoc.isInvalid() ? Tok.isOneOf(tok::kw_import, tok::identifier) + assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import) : Tok.isObjCAtKeyword(tok::objc_import)) && "Improper start to module import"); bool IsObjCAtImport = Tok.isObjCAtKeyword(tok::objc_import); diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index bdb5e23510118c..49c005b245d207 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -986,11 +986,11 @@ ort \ ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" - "exp\\\nort import:l[[rename]];" - "import<<=3;import a b d e d e f e;" - "import foo[[no_unique_address]];import foo();" - "import f(:sefse);import f(->a=3);" - "<TokBeforeEOF>\n", + "\nexp\\\nort import:l[[rename]];" + "\nimport<<=3;\nimport a b d e d e f e;" + "\nimport foo[[no_unique_address]];\nimport foo();" + "\nimport f(:sefse);\nimport f(->a=3);" + "\n<TokBeforeEOF>\n", Out.data()); ASSERT_EQ(Directives.size(), 11u); EXPECT_EQ(Directives[0].Kind, pp_include); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits