https://github.com/cyndyishida created 
https://github.com/llvm/llvm-project/pull/142452

Sometimes, when a user writes invalid code, the minimization used for scanning 
can create a stream of tokens that is invalid at lex time. This patch protects 
against the case where there are valid (non-c++20) import directives discovered 
in the middle of an invalid `import` declaration.

resolves: rdar://152335844

>From 3b2f3d98af5acd5923e795cece7defe835328181 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ish...@apple.com>
Date: Mon, 2 Jun 2025 11:09:30 -0700
Subject: [PATCH] [clang][dep-scan] Resolve lexer crash from a permutation of
 invalid tokens

Sometimes when a user writes invalid code, the minimization used for
scanning can create a stream of tokens that is invalid at lex time.
This patch protects against the case where theres valid import directives
discovered in the middle of an invalid `import` declaration.

resolves: rdar://152335844
---
 clang/include/clang/Basic/DiagnosticLexKinds.td   |  3 ++-
 clang/lib/Lex/DependencyDirectivesScanner.cpp     |  5 +++++
 ...urce_to_dependency_directives_invalid_import.m |  6 ++++++
 .../Lex/DependencyDirectivesScannerTest.cpp       | 15 ++++++++++++---
 4 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m

diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td 
b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 723f5d48b4f5f..f9c3e1608a2b0 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -1029,7 +1029,8 @@ def err_dep_source_scanner_missing_semi_after_at_import : 
Error<
   "could not find ';' after @import">;
 def err_dep_source_scanner_unexpected_tokens_at_import : Error<
   "unexpected extra tokens at end of @import declaration">;
-
+def err_dep_source_scanner_unexpected_tokens_in_directive_body
+    : Error<"unexpected extra tokens inside declaration">;
 }
 
 def err_pp_double_begin_pragma_unsafe_buffer_usage :
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp 
b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..1310077ede614 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -503,6 +503,10 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, 
const char *&First,
           diag::err_dep_source_scanner_missing_semi_after_at_import);
     if (Tok.is(tok::semi))
       break;
+    if (Tok.is(tok::hash) || Tok.is(tok::at))
+      return reportError(
+          First,
+          diag::err_dep_source_scanner_unexpected_tokens_in_directive_body);
   }
   pushDirective(Kind);
   skipWhitespace(First, End);
@@ -846,6 +850,7 @@ bool Scanner::lexPPLine(const char *&First, const char 
*const End) {
   if (*First == '@')
     return lexAt(First, End);
 
+  // Handle module directives for C++20 modules.
   if (*First == 'i' || *First == 'e' || *First == 'm')
     return lexModule(First, End);
 
diff --git 
a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m 
b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
new file mode 100644
index 0000000000000..eb963301807b5
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 
2>&1
+
+import <invalid.h>
+#import "invalid.h" // expected-error {{unexpected extra tokens inside 
declaration}}
+@;
+#pragma clang module import invalid
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp 
b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..90e37bba24ee7 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -33,7 +33,7 @@ static bool minimizeSourceToDependencyDirectives(
 
   return false;
 }
-
+// Returns false on successful minimization.
 static bool minimizeSourceToDependencyDirectives(StringRef Input,
                                                  SmallVectorImpl<char> &Out) {
   SmallVector<dependency_directives_scan::Token, 16> Tokens;
@@ -677,13 +677,22 @@ TEST(MinimizeSourceToDependencyDirectivesTest, 
EmptyIncludesAndImports) {
                Out.data());
 }
 
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
   SmallVector<char, 128> Out;
 
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+  // Minimization can recover, and let the real compilation fail.
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("import\n", Out));
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
+
+  // Minimization cannot recover.
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+                                                   "#include \"Foo.h\"",
+                                                   Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to