idlecode updated this revision to Diff 91466. idlecode added a comment. Addressed Alexander's comments
https://reviews.llvm.org/D30748 Files: lib/Lex/Lexer.cpp unittests/Lex/LexerTest.cpp
Index: unittests/Lex/LexerTest.cpp =================================================================== --- unittests/Lex/LexerTest.cpp +++ unittests/Lex/LexerTest.cpp @@ -380,4 +380,34 @@ EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); } +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { + // Each line should have the same length for + // further offset calculation to be more straightforward. + const unsigned IdentifierLength = 8; + std::string TextToLex = "rabarbar\n" + "foo\\\nbar\n" + "foo\\\rbar\n" + "fo\\\r\nbar\n" + "foo\\\n\rba\n"; + std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; + std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); + + for (const Token &Tok : LexedTokens) { + std::pair<FileID, unsigned> OriginalLocation = + SourceMgr.getDecomposedLoc(Tok.getLocation()); + for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { + SourceLocation LookupLocation = + Tok.getLocation().getLocWithOffset(Offset); + + std::pair<FileID, unsigned> FoundLocation = + SourceMgr.getDecomposedExpansionLoc( + Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); + + // Check that location returned by the GetBeginningOfToken + // is the same as original token location reported by Lexer. + EXPECT_EQ(FoundLocation.second, OriginalLocation.second); + } + } +} + } // anonymous namespace Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -452,39 +452,56 @@ return false; } +/// \brief Check if new line pointed by Str is escaped. +static bool isNewLineEscaped(const char *BufferStart, const char *Str) { + assert(isVerticalWhitespace(Str[0])); + if (Str - 1 < BufferStart) + return false; + if (Str[-1] == '\\') + return true; + if (!isVerticalWhitespace(Str[-1])) + return false; + if (Str - 2 < BufferStart) + return false; + if (Str[-2] == '\\') + return true; + return false; +} + static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { assert(Loc.isFileID()); std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; - + bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) return Loc; // Back up from the current location until we hit the beginning of a line // (or the buffer). We'll relex from that point. - const char *BufStart = Buffer.data(); if (LocInfo.second >= Buffer.size()) return Loc; - - const char *StrData = BufStart+LocInfo.second; - if (StrData[0] == '\n' || StrData[0] == '\r') - return Loc; + + const char *BufStart = Buffer.data(); + const char *StrData = BufStart + LocInfo.second; const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { - ++LexStart; - break; - } + for (; LexStart != BufStart; --LexStart) { + if (!isVerticalWhitespace(LexStart[0])) + continue; - --LexStart; + if (isNewLineEscaped(BufStart, LexStart)) + continue; + + // LexStart should point at first character of logical line. + ++LexStart; + break; } - + // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits