https://github.com/cyndyishida created https://github.com/llvm/llvm-project/pull/142452
Sometimes, when a user writes invalid code, the minimization used for scanning can create a stream of tokens that is invalid at lex time. This patch protects against the case where there are valid (non-c++20) import directives discovered in the middle of an invalid `import` declaration. resolves: rdar://152335844 >From 3b2f3d98af5acd5923e795cece7defe835328181 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida <cyndy_ish...@apple.com> Date: Mon, 2 Jun 2025 11:09:30 -0700 Subject: [PATCH] [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens Sometimes when a user writes invalid code, the minimization used for scanning can create a stream of tokens that is invalid at lex time. This patch protects against the case where theres valid import directives discovered in the middle of an invalid `import` declaration. resolves: rdar://152335844 --- clang/include/clang/Basic/DiagnosticLexKinds.td | 3 ++- clang/lib/Lex/DependencyDirectivesScanner.cpp | 5 +++++ ...urce_to_dependency_directives_invalid_import.m | 6 ++++++ .../Lex/DependencyDirectivesScannerTest.cpp | 15 ++++++++++++--- 4 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 723f5d48b4f5f..f9c3e1608a2b0 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1029,7 +1029,8 @@ def err_dep_source_scanner_missing_semi_after_at_import : Error< "could not find ';' after @import">; def err_dep_source_scanner_unexpected_tokens_at_import : Error< "unexpected extra tokens at end of @import declaration">; - +def err_dep_source_scanner_unexpected_tokens_in_directive_body + : Error<"unexpected extra tokens inside declaration">; } def err_pp_double_begin_pragma_unsafe_buffer_usage : diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 088d1cc96e3a2..1310077ede614 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -503,6 +503,10 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, diag::err_dep_source_scanner_missing_semi_after_at_import); if (Tok.is(tok::semi)) break; + if (Tok.is(tok::hash) || Tok.is(tok::at)) + return reportError( + First, + diag::err_dep_source_scanner_unexpected_tokens_in_directive_body); } pushDirective(Kind); skipWhitespace(First, End); @@ -846,6 +850,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { if (*First == '@') return lexAt(First, End); + // Handle module directives for C++20 modules. if (*First == 'i' || *First == 'e' || *First == 'm') return lexModule(First, End); diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m new file mode 100644 index 0000000000000..eb963301807b5 --- /dev/null +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +import <invalid.h> +#import "invalid.h" // expected-error {{unexpected extra tokens inside declaration}} +@; +#pragma clang module import invalid diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index bdb5e23510118..90e37bba24ee7 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -33,7 +33,7 @@ static bool minimizeSourceToDependencyDirectives( return false; } - +// Returns false on successful minimization. static bool minimizeSourceToDependencyDirectives(StringRef Input, SmallVectorImpl<char> &Out) { SmallVector<dependency_directives_scan::Token, 16> Tokens; @@ -677,13 +677,22 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) { Out.data()); } -TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { +TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) { SmallVector<char, 128> Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); + // Minimization can recover, and let the real compilation fail. ASSERT_FALSE( minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("import\n", Out)); + EXPECT_STREQ("<TokBeforeEOF>\n", Out.data()); + + // Minimization cannot recover. + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("import <Foo.h>\n" + "#include \"Foo.h\"", + Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits