Author: Cyndy Ishida
Date: 2025-06-06T13:45:16-07:00
New Revision: 897b0301d2e2ff28d3976fe95b64be5a85815908

URL: 
https://github.com/llvm/llvm-project/commit/897b0301d2e2ff28d3976fe95b64be5a85815908
DIFF: 
https://github.com/llvm/llvm-project/commit/897b0301d2e2ff28d3976fe95b64be5a85815908.diff

LOG: [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens 
(#142452)

Sometimes, when a user writes invalid code, the minimization used for
scanning can create a stream of tokens that is invalid at lex time. This
patch protects against the case where there are valid (non-c++20) import
directives discovered in the middle of an invalid `import` declaration.

Mostly authored by: @akyrtzi 
resolves: rdar://152335844

Added: 
    

Modified: 
    clang/lib/Lex/DependencyDirectivesScanner.cpp
    clang/unittests/Lex/DependencyDirectivesScannerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp 
b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..4606b85d42fe7 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -496,7 +496,15 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, 
const char *&First,
                                      const char *const End) {
   const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
   for (;;) {
+    // Keep a copy of the First char incase it needs to be reset.
+    const char *Previous = First;
     const dependency_directives_scan::Token &Tok = lexToken(First, End);
+    if ((Tok.is(tok::hash) || Tok.is(tok::at)) &&
+        (Tok.Flags & clang::Token::StartOfLine)) {
+      CurDirToks.pop_back();
+      First = Previous;
+      return false;
+    }
     if (Tok.is(tok::eof))
       return reportError(
           DirectiveLoc,
@@ -846,6 +854,7 @@ bool Scanner::lexPPLine(const char *&First, const char 
*const End) {
   if (*First == '@')
     return lexAt(First, End);
 
+  // Handle module directives for C++20 modules.
   if (*First == 'i' || *First == 'e' || *First == 'm')
     return lexModule(First, End);
 

diff  --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp 
b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..377c066f031d3 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -677,13 +677,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, 
EmptyIncludesAndImports) {
                Out.data());
 }
 
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
   SmallVector<char, 128> Out;
 
   ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+                                                    "@import Foo;",
+                                                    Out));
+  EXPECT_STREQ("@import Foo;\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+                                           "#import <Foo.h>\n"
+                                           "@;\n"
+                                           "#pragma clang module import Foo",
+                                           Out));
+  EXPECT_STREQ("#import <Foo.h>\n"
+               "#pragma clang module import Foo\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to