vsapsai created this revision. vsapsai added reviewers: arphaman, kcc. Fix makes the loop in LexAngledStringLiteral more like the loops in LexStringLiteral, LexCharConstant. When we skip a character after backslash, we need to check if we reached the end of the file instead of reading the next character unconditionally.
Discovered by OSS-Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3832 rdar://problem/35572754 https://reviews.llvm.org/D41423 Files: clang/lib/Lex/Lexer.cpp clang/unittests/Lex/LexerTest.cpp Index: clang/unittests/Lex/LexerTest.cpp =================================================================== --- clang/unittests/Lex/LexerTest.cpp +++ clang/unittests/Lex/LexerTest.cpp @@ -474,8 +474,10 @@ } TEST_F(LexerTest, AvoidPastEndOfStringDereference) { - std::vector<Token> LexedTokens = Lex(" // \\\n"); - EXPECT_TRUE(LexedTokens.empty()); + EXPECT_TRUE(Lex(" // \\\n").empty()); + // rdar://problem/35572754 + EXPECT_TRUE(Lex("#include <\\\\").empty()); + EXPECT_TRUE(Lex("#include <\\\\\n").empty()); } TEST_F(LexerTest, StringizingRasString) { Index: clang/lib/Lex/Lexer.cpp =================================================================== --- clang/lib/Lex/Lexer.cpp +++ clang/lib/Lex/Lexer.cpp @@ -2009,18 +2009,21 @@ const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); while (C != '>') { - // Skip escaped characters. - if (C == '\\' && CurPtr < BufferEnd) { - // Skip the escaped character. - getAndAdvanceChar(CurPtr, Result); - } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + // Skip escaped characters. Escaped newlines will already be processed by + // getAndAdvanceChar. + if (C == '\\') + C = getAndAdvanceChar(CurPtr, Result); + + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); return true; - } else if (C == 0) { + } + + if (C == 0) { NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result);
Index: clang/unittests/Lex/LexerTest.cpp =================================================================== --- clang/unittests/Lex/LexerTest.cpp +++ clang/unittests/Lex/LexerTest.cpp @@ -474,8 +474,10 @@ } TEST_F(LexerTest, AvoidPastEndOfStringDereference) { - std::vector<Token> LexedTokens = Lex(" // \\\n"); - EXPECT_TRUE(LexedTokens.empty()); + EXPECT_TRUE(Lex(" // \\\n").empty()); + // rdar://problem/35572754 + EXPECT_TRUE(Lex("#include <\\\\").empty()); + EXPECT_TRUE(Lex("#include <\\\\\n").empty()); } TEST_F(LexerTest, StringizingRasString) { Index: clang/lib/Lex/Lexer.cpp =================================================================== --- clang/lib/Lex/Lexer.cpp +++ clang/lib/Lex/Lexer.cpp @@ -2009,18 +2009,21 @@ const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); while (C != '>') { - // Skip escaped characters. - if (C == '\\' && CurPtr < BufferEnd) { - // Skip the escaped character. - getAndAdvanceChar(CurPtr, Result); - } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + // Skip escaped characters. Escaped newlines will already be processed by + // getAndAdvanceChar. + if (C == '\\') + C = getAndAdvanceChar(CurPtr, Result); + + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); return true; - } else if (C == 0) { + } + + if (C == 0) { NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits