idlecode updated this revision to Diff 91466.
idlecode added a comment.
Addressed Alexander's comments
https://reviews.llvm.org/D30748
Files:
lib/Lex/Lexer.cpp
unittests/Lex/LexerTest.cpp
Index: unittests/Lex/LexerTest.cpp
===================================================================
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -380,4 +380,34 @@
EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
}
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+ // Each line should have the same length for
+ // further offset calculation to be more straightforward.
+ const unsigned IdentifierLength = 8;
+ std::string TextToLex = "rabarbar\n"
+ "foo\\\nbar\n"
+ "foo\\\rbar\n"
+ "fo\\\r\nbar\n"
+ "foo\\\n\rba\n";
+ std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
+ std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+ for (const Token &Tok : LexedTokens) {
+ std::pair<FileID, unsigned> OriginalLocation =
+ SourceMgr.getDecomposedLoc(Tok.getLocation());
+ for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+ SourceLocation LookupLocation =
+ Tok.getLocation().getLocWithOffset(Offset);
+
+ std::pair<FileID, unsigned> FoundLocation =
+ SourceMgr.getDecomposedExpansionLoc(
+ Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+ // Check that location returned by the GetBeginningOfToken
+ // is the same as original token location reported by Lexer.
+ EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+ }
+ }
+}
+
} // anonymous namespace
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -452,39 +452,56 @@
return false;
}
+/// \brief Check if new line pointed by Str is escaped.
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+ assert(isVerticalWhitespace(Str[0]));
+ if (Str - 1 < BufferStart)
+ return false;
+ if (Str[-1] == '\\')
+ return true;
+ if (!isVerticalWhitespace(Str[-1]))
+ return false;
+ if (Str - 2 < BufferStart)
+ return false;
+ if (Str[-2] == '\\')
+ return true;
+ return false;
+}
+
static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts) {
assert(Loc.isFileID());
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
if (LocInfo.first.isInvalid())
return Loc;
-
+
bool Invalid = false;
StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
if (Invalid)
return Loc;
// Back up from the current location until we hit the beginning of a line
// (or the buffer). We'll relex from that point.
- const char *BufStart = Buffer.data();
if (LocInfo.second >= Buffer.size())
return Loc;
-
- const char *StrData = BufStart+LocInfo.second;
- if (StrData[0] == '\n' || StrData[0] == '\r')
- return Loc;
+
+ const char *BufStart = Buffer.data();
+ const char *StrData = BufStart + LocInfo.second;
const char *LexStart = StrData;
- while (LexStart != BufStart) {
- if (LexStart[0] == '\n' || LexStart[0] == '\r') {
- ++LexStart;
- break;
- }
+ for (; LexStart != BufStart; --LexStart) {
+ if (!isVerticalWhitespace(LexStart[0]))
+ continue;
- --LexStart;
+ if (isNewLineEscaped(BufStart, LexStart))
+ continue;
+
+ // LexStart should point at first character of logical line.
+ ++LexStart;
+ break;
}
-
+
// Create a lexer starting at the beginning of this token.
SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits