Author: Samira Bazuzi Date: 2024-12-05T09:37:46-05:00 New Revision: f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e
URL: https://github.com/llvm/llvm-project/commit/f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e DIFF: https://github.com/llvm/llvm-project/commit/f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e.diff LOG: Skip escaped newlines before checking for whitespace in Lexer::getRawToken. (#117548) The Lexer used in getRawToken is not told to keep whitespace, so when it skips over escaped newlines, it also ignores whitespace, regardless of getRawToken's IgnoreWhiteSpace parameter. Instead of letting this case fall through to lexing, check for whitespace after skipping over any escaped newlines. Added: Modified: clang/lib/Lex/Lexer.cpp clang/unittests/Lex/LexerTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index e58c8bc72ae5b3..72364500a48f9f 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -527,7 +527,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result, const char *StrData = Buffer.data()+LocInfo.second; - if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) + if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0])) return true; // Create a lexer starting at the beginning of this token. diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 47aa2c131a304d..aead7fb899d0a8 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -652,6 +652,38 @@ TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { EXPECT_TRUE(ToksView.empty()); } +TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) { + const llvm::StringLiteral Source = R"cc( + #define ONE \ + 1 + + int i = ONE; + )cc"; + std::vector<Token> Toks = + CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal, + tok::numeric_constant, tok::semi}); + + // Set up by getting the raw token for the `1` in the macro definition. + const Token &OneExpanded = Toks[3]; + Token Tok; + ASSERT_FALSE( + Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts)); + // The `ONE`. + ASSERT_EQ(Tok.getKind(), tok::raw_identifier); + ASSERT_FALSE( + Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()), + Tok, SourceMgr, LangOpts)); + // The `1` in the macro definition. + ASSERT_EQ(Tok.getKind(), tok::numeric_constant); + + // Go back 4 characters: two spaces, one newline, and the backslash. + SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4); + // Expect true (=failure) because the whitespace immediately after the + // escaped newline is not ignored. + EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts, + /*IgnoreWhiteSpace=*/false)); +} + TEST(LexerPreambleTest, PreambleBounds) { std::vector<std::string> Cases = { R"cc([[ _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits