tbaeder created this revision.
tbaeder added reviewers: aaron.ballman, iains, urnathan, ChuanqiXu.
Herald added a project: All.
tbaeder requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

As in: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1703r1.html

Posting this mostly to get some feedback on the general path taken.

>From reading the paper, I understand that the preprocessor should gain a 
>"import-keyword" token, but that is already taken by the GNU `#import` 
>extension.

The implementation here starts converting newlines to `eod` tokens when seeing 
a `export` or `import` token. However, that means ignoring the `eod` tokens 
when parsing an export block, an exported declaration or a non-header-unit 
module import.

Let me know what you think.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D122885

Files:
  clang/include/clang/Basic/DiagnosticLexKinds.td
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/lib/Lex/Preprocessor.cpp
  clang/lib/Parse/ParseDeclCXX.cpp
  clang/lib/Parse/Parser.cpp
  clang/test/CXX/cpp/cpp.module/p1.cpp
  clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
  clang/test/Modules/P1703R1.cpp

Index: clang/test/Modules/P1703R1.cpp
===================================================================
--- /dev/null
+++ clang/test/Modules/P1703R1.cpp
@@ -0,0 +1,103 @@
+
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: mkdir -p %t/system
+// RUN: split-file %s %t
+// RUN: cd %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit \
+// RUN: -xc++-header header.h -o header.pcm
+
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -isystem system \
+// RUN: -xc++-system-header system-header.h -o system/system-header.pcm
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface dummy.cpp -o dummy.pcm
+
+// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm \
+// RUN: -fmodule-file=system/system-header.pcm \
+// RUN: -fmodule-file=dummy.pcm \
+// RUN: -emit-module-interface -isystem system \
+// RUN: main.cpp -verify
+
+//--- header.h
+int foo(int);
+
+//--- system/system-header.h
+int foo2(int);
+
+//--- dummy.cpp
+export module dummy;
+
+//--- main.cpp
+export module main;
+#define IMPORT import
+#define EXPORT export
+#define HEADER "header.h"
+#define SYSTEM_HEADER <system-header.h>
+#define SEMI ;
+
+
+/// Fine.
+import "header.h";
+export import "header.h";
+
+
+import
+"header.h";// expected-error {{expected a module name after 'import'}}
+export import
+"header.h"; // expected-error {{expected a module name after 'import'}}
+export
+import "header.h"; // expected-error {{expected a module name after 'import'}}
+
+import "header.h"; import "header.h"; // expected-error {{import keyword must be at start of line}}
+                                      // expected-error@-1 {{extra tokens after module import}}
+
+import "header.h" SEMI // expected-error {{semicolon terminating header import declaration cannot be produced by a macro}}
+IMPORT "header.h"; // expected-error {{import keyword cannot be produced by a macro}}
+EXPORT import "header.h"; // expected-error {{export keyword cannot be produced by a macro}}
+import HEADER; /// Fine.
+
+
+import <system-header.h>;
+export import <system-header.h>;
+
+import
+<system-header.h>; // expected-error {{expected a module name after 'import'}}
+IMPORT <system-header.h>;// expected-error {{import keyword cannot be produced by a macro}}
+EXPORT import <system-header.h>;// expected-error {{export keyword cannot be produced by a macro}}
+import SYSTEM_HEADER;
+
+
+
+/// Normal module imports are unaffected
+import dummy;
+import
+dummy;
+export
+import
+dummy;
+export import
+dummy;
+export
+import dummy;
+import dummy; import dummy;
+
+
+
+// TODO: The diagnostics here could be much better.
+export
+import "header.h"; // expected-error {{expected a module name after 'import'}}
+
+export import
+"header.h" // expected-error {{expected a module name after 'import'}}
+
+
+export void foo() {}
+export void
+baz(){}
+export
+void bar(){}
+
+export {
+  void a(){}
+}
Index: clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
===================================================================
--- clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
+++ clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
@@ -56,26 +56,3 @@
 #define HEADER <foo  bar>
 // CHECK: import <foo bar>;
 import HEADER;
-
-// CHECK: import <foo bar>;
-import <
-foo
-  bar
->;
-
-// CHECK: import{{$}}
-// CHECK: {{^}}<foo bar>;
-import
-<
-foo
-  bar
->;
-
-// CHECK: import{{$}}
-// CHECK: {{^}}<foo  bar>;
-import
-<foo  bar>;
-
-#define IMPORT import <foo  bar>
-// CHECK: import <foo bar>;
-IMPORT;
Index: clang/test/CXX/cpp/cpp.module/p1.cpp
===================================================================
--- clang/test/CXX/cpp/cpp.module/p1.cpp
+++ clang/test/CXX/cpp/cpp.module/p1.cpp
@@ -5,11 +5,11 @@
 // expected-error@+1 {{semicolon terminating header import declaration cannot be produced by a macro}}
 import "empty.h" SEMI // CHECK: import attrs.{{.*}};
 
-#define IMPORT import "empty.h"
-IMPORT; // CHECK: import attrs.{{.*}};
+#define IMPORT "empty.h"
+import IMPORT; // CHECK: import attrs.{{.*}};
 
-#define IMPORT_ANGLED import <empty.h>
-IMPORT_ANGLED; // CHECK: import attrs.{{.*}};
+#define IMPORT_ANGLED <empty.h>
+import IMPORT_ANGLED; // CHECK: import attrs.{{.*}};
 
 // Ensure that macros only become visible at the semicolon.
 // CHECK: import attrs.{{.*}} ATTRS ;
Index: clang/lib/Parse/Parser.cpp
===================================================================
--- clang/lib/Parse/Parser.cpp
+++ clang/lib/Parse/Parser.cpp
@@ -624,30 +624,34 @@
     return false;
 
   case tok::kw_export:
-    switch (NextToken().getKind()) {
-    case tok::kw_module:
+    if (NextToken().is(tok::kw_module))
       goto module_decl;
+    else if (NextToken().is(tok::identifier) ||
+             (NextToken().is(tok::eod) &&
+              GetLookAheadToken(2).is(tok::identifier))) {
+      // Note: no need to handle kw_import here. We only form kw_import under
+      // the Modules TS, and in that case 'export imort' is parsed as an
+      // export-declaration containing an import-declaration
+
+      // Recognize context-sensitive C++20 'export module' and 'export import'
+      // declarations.
+      IdentifierInfo *II;
+      int ColonColonToken;
+      if (NextToken().is(tok::eod)) {
+        II = GetLookAheadToken(2).getIdentifierInfo();
+        ColonColonToken = 3;
+      } else {
+        II = NextToken().getIdentifierInfo();
+        ColonColonToken = 2;
+      }
 
-    // Note: no need to handle kw_import here. We only form kw_import under
-    // the Modules TS, and in that case 'export import' is parsed as an
-    // export-declaration containing an import-declaration.
-
-    // Recognize context-sensitive C++20 'export module' and 'export import'
-    // declarations.
-    case tok::identifier: {
-      IdentifierInfo *II = NextToken().getIdentifierInfo();
       if ((II == Ident_module || II == Ident_import) &&
-          GetLookAheadToken(2).isNot(tok::coloncolon)) {
+          GetLookAheadToken(ColonColonToken).isNot(tok::coloncolon)) {
         if (II == Ident_module)
           goto module_decl;
         else
           goto import_decl;
       }
-      break;
-    }
-
-    default:
-      break;
     }
     break;
 
@@ -2317,6 +2321,8 @@
                                  ? Sema::ModuleDeclKind::Interface
                                  : Sema::ModuleDeclKind::Implementation;
 
+  TryConsumeToken(tok::eod);
+
   assert(
       (Tok.is(tok::kw_module) ||
        (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) &&
@@ -2403,9 +2409,11 @@
 Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
                                 Sema::ModuleImportState &ImportState) {
   SourceLocation StartLoc = AtLoc.isInvalid() ? Tok.getLocation() : AtLoc;
+  bool ImportTokAtStartOfLine = Tok.isAtStartOfLine();
 
   SourceLocation ExportLoc;
   TryConsumeToken(tok::kw_export, ExportLoc);
+  TryConsumeToken(tok::eod);
 
   assert((AtLoc.isInvalid() ? Tok.isOneOf(tok::kw_import, tok::identifier)
                             : Tok.isObjCAtKeyword(tok::objc_import)) &&
@@ -2437,10 +2445,16 @@
     else
       IsPartition = true;
   } else {
+    TryConsumeToken(tok::eod);
     if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true))
       return nullptr;
   }
 
+  // Diagnose C++20 import keyword not being at the start of the line
+  // now, since this should only be emitted for header unit imports.
+  if (getLangOpts().CPlusPlusModules && HeaderUnit && !ImportTokAtStartOfLine)
+    Diag(StartLoc, diag::err_import_keyword_not_at_start_of_line);
+
   ParsedAttributes Attrs(AttrFactory);
   MaybeParseCXX11Attributes(Attrs);
   // We don't support any module import attributes yet.
@@ -2498,6 +2512,11 @@
     Import = Actions.ActOnModuleImport(StartLoc, ExportLoc, ImportLoc, Path,
                                        IsPartition);
   ExpectAndConsumeSemi(diag::err_module_expected_semi);
+
+  if (getLangOpts().CPlusPlusModules && HeaderUnit &&
+      !Tok.isOneOf(tok::eof, tok::eod) && !Tok.isAtStartOfLine())
+    Diag(Tok, diag::err_extra_tokens_after_module_import);
+
   if (Import.isInvalid())
     return nullptr;
 
Index: clang/lib/Parse/ParseDeclCXX.cpp
===================================================================
--- clang/lib/Parse/ParseDeclCXX.cpp
+++ clang/lib/Parse/ParseDeclCXX.cpp
@@ -429,6 +429,8 @@
   assert(Tok.is(tok::kw_export));
   SourceLocation ExportLoc = ConsumeToken();
 
+  TryConsumeToken(tok::eod);
+
   ParseScope ExportScope(this, Scope::DeclScope);
   Decl *ExportDecl = Actions.ActOnStartExportDecl(
       getCurScope(), ExportLoc,
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -928,6 +928,9 @@
   // if this token is being produced as a result of phase 4 of translation.
   if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
       !Result.getFlag(Token::IsReinjected)) {
+    if (CurPPLexer)
+      CurPPLexer->ParsingPreprocessorDirective = false;
+
     switch (Result.getKind()) {
     case tok::l_paren: case tok::l_square: case tok::l_brace:
       ImportSeqState.handleOpenBracket();
@@ -946,10 +949,17 @@
       ImportSeqState.handleHeaderName();
       break;
     case tok::kw_export:
+      if (getLangOpts().CPlusPlusModules && Result.getLocation().isMacroID())
+        Diag(Result, diag::err_header_export_in_macro);
+      if (CurPPLexer && getLangOpts().CPlusPlusModules)
+        CurPPLexer->ParsingPreprocessorDirective = true;
       ImportSeqState.handleExport();
       break;
     case tok::identifier:
       if (Result.getIdentifierInfo()->isModulesImport()) {
+        if (getLangOpts().CPlusPlusModules && Result.getLocation().isMacroID())
+          Diag(Result, diag::err_header_import_in_macro);
+
         ImportSeqState.handleImport();
         if (ImportSeqState.afterImportSeq()) {
           ModuleImportLoc = Result.getLocation();
@@ -961,6 +971,8 @@
       }
       LLVM_FALLTHROUGH;
     default:
+      if (CurPPLexer && getLangOpts().CPlusPlusModules)
+        CurPPLexer->ParsingPreprocessorDirective = false;
       ImportSeqState.handleMisc();
       break;
     }
@@ -1135,7 +1147,11 @@
 bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
   recomputeCurLexerKind();
+  // C++20 modules require the import statement to be on a single line
+  bool ConvertNewlinesToEod = getLangOpts().CPlusPlusModules;
 
+  if (ConvertNewlinesToEod && CurPPLexer)
+    CurPPLexer->ParsingPreprocessorDirective = true;
   // Lex the next token. The header-name lexing rules are used at the start of
   // a pp-import.
   //
@@ -1169,6 +1185,10 @@
     // Consume the pp-import-suffix and expand any macros in it now. We'll add
     // it back into the token stream later.
     CollectPpImportSuffix(Suffix);
+
+    if (ConvertNewlinesToEod && CurPPLexer)
+      CurPPLexer->ParsingPreprocessorDirective = false;
+
     if (Suffix.back().isNot(tok::semi)) {
       // This is not a pp-import after all.
       EnterTokens(Suffix);
@@ -1228,6 +1248,9 @@
     return false;
   }
 
+  if (ConvertNewlinesToEod && CurPPLexer)
+    CurPPLexer->ParsingPreprocessorDirective = false;
+
   // The token sequence
   //
   //   import identifier (. identifier)*
Index: clang/include/clang/Basic/DiagnosticParseKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticParseKinds.td
+++ clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1547,6 +1547,11 @@
   "module partition imports must be within a module purview">;
 def err_import_in_wrong_fragment : Error<
   "module%select{| partition}0 imports cannot be in the %select{global|private}1 module fragment">;
+def err_import_keyword_not_at_start_of_line : Error<
+  "import keyword must be at start of line">;
+def err_extra_tokens_after_module_import : Error<
+  "extra tokens after module import">;
+
 
 def err_export_empty : Error<"export declaration cannot be empty">;
 }
Index: clang/include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticLexKinds.td
+++ clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -850,6 +850,10 @@
 def err_header_import_semi_in_macro : Error<
   "semicolon terminating header import declaration cannot be produced "
   "by a macro">;
+def err_header_import_in_macro : Error<
+  "import keyword cannot be produced by a macro">;
+def err_header_export_in_macro : Error<
+  "export keyword cannot be produced by a macro">;
 def err_header_import_not_header_unit : Error<
   "header file %0 (aka '%1') cannot be imported because "
   "it is not known to be a header unit">;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to