================ @@ -1329,6 +1341,100 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { return true; } +/// Lex a token following the 'module' contextual keyword. +/// +/// [cpp.module]/p2: +/// The pp-tokens, if any, of a pp-module shall be of the form: +/// pp-module-name pp-module-partition[opt] pp-tokens[opt] +/// +/// where the pp-tokens (if any) shall not begin with a ( preprocessing token +/// and the grammar non-terminals are defined as: +/// pp-module-name: +/// pp-module-name-qualifierp[opt] identifier +/// pp-module-partition: +/// : pp-module-name-qualifier[opt] identifier +/// pp-module-name-qualifier: +/// identifier . +/// pp-module-name-qualifier identifier . +/// No identifier in the pp-module-name or pp-module-partition shall currently +/// be defined as an object-like macro. +/// +/// [cpp.module]/p3: +/// Any preprocessing tokens after the module preprocessing token in the module +/// directive are processed just as in normal text. +bool Preprocessor::LexAfterModuleDecl(Token &Result) { + // Figure out what kind of lexer we actually have. + recomputeCurLexerKind(); + LexUnexpandedToken(Result); + + auto EnterTokens = [this](ArrayRef<Token> Toks, bool DisableMacroExpansion) { + auto ToksCopy = std::make_unique<Token[]>(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), DisableMacroExpansion, + /*IsReinject=*/false); + }; + + // If we don't expect an identifier but got an identifier, it's not a part of + // module name. + if (!ModuleDeclExpectsIdentifier && Result.is(tok::identifier)) { + EnterTokens(Result, /*DisableMacroExpansion=*/false); + return false; + } + + // The token sequence + // + // export[opt] module identifier (. identifier)* + // + // indicates a module directive. We already saw the 'module' + // contextual keyword, so now we're looking for the identifiers. + if (ModuleDeclExpectsIdentifier && Result.is(tok::identifier)) { + auto *MI = getMacroInfo(Result.getIdentifierInfo()); + if (MI && MI->isObjectLike()) { + Diag(Result, diag::err_module_decl_cannot_be_macros) + << Result.getLocation() << ModuleDeclLexingPartitionName + << Result.getIdentifierInfo(); + } + ModuleDeclExpectsIdentifier = false; + CurLexerCallback = CLK_LexAfterModuleDecl; + return true; + } + + // If we're expecting a '.', a ':' or a ';', and we got a '.', then wait until + // we see the next identifier. + if (!ModuleDeclExpectsIdentifier && Result.isOneOf(tok::period, tok::colon)) { + ModuleDeclExpectsIdentifier = true; + ModuleDeclLexingPartitionName = Result.is(tok::colon); + CurLexerCallback = CLK_LexAfterModuleDecl; + return true; + } + + // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a ( + // preprocessing token [...] + if (!ModuleDeclExpectsIdentifier && Result.is(tok::l_paren)) { + ModuleDeclExpectsIdentifier = false; + Diag(Result, diag::err_unxepected_paren_in_module_decl) + << ModuleDeclLexingPartitionName; + Token Tok; + // We already have a '('. + unsigned NumParens = 1; + while (true) { + LexUnexpandedToken(Tok); + if (Tok.isOneOf(tok::eod, tok::eof, tok::semi, tok::period, tok::colon)) { + EnterTokens(Tok, /*DisableMacroExpansion=*/true); + break; + } + if (Tok.is(tok::l_paren)) + NumParens++; + else if (Tok.is(tok::r_paren) && --NumParens == 0) + break; + } + CurLexerCallback = CLK_LexAfterModuleDecl; + return false; + } + + return true; +} + ---------------- yronglin wrote:
Sorry for the very late reply! I’ve tried to implement this approach. But I’m fall into trouble. ``` #ifndef VERSION_H #define VERSION_H #define VERSION libv5 #define A a #define B b #define C c #define FUNC_LIKE(X) function_like_##X #define ATTR [[]] #define SEMICOLON ; #endif export module a.FUNC_LIKE:c ATTRS; // OK, FUNC_LIKE would not be treated as a macro name. ``` *The 1st approach*: We try to consume all module name tokens and return a token::cxx_module_name, which includes {‘a’, ‘.’, ‘FUNC_LIKE’, ‘.’, ‘:’, ‘c’}, and we also will consumed and stop at ‘ATTRS’, but it’s not a part of module name, and need to be put back to token stream. Current a Lex action cannot both return a token and cache tokens (doing so would corrupt the token cache if the call to Lex comes from CachingLex / PeekAhead). *The 2nd approach*: We try to consume all module name tokens and got {‘a’, ‘.’, ‘FUNC_LIKE’, ‘.’, ‘:’, ‘c’} we need to put this token array back to token stream with macro expansion disabled. Also, we have consumed and stop at ‘ATTRS’, but it’s not a part of module name, and need to be put back to token stream(enable macro expansion). The two EnterTokenStreans are conflict. I also tried to eliminate the complex state machine in LexAfterModuleImport. https://github.com/llvm/llvm-project/pull/90574 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits