zequanwu created this revision.
zequanwu added a reviewer: vsk.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
zequanwu requested review of this revision.
Add a hook to track empty line regions when lexing.
But the performance is slowed down by around 17%-18% in various metrics, when
building clang in stage 2.
Before:
Performance counter stats for 'ninja':
311,209,849.81 msec task-clock:u # 67.409 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
281,387,702 page-faults:u # 0.904 K/sec
1,017,711,372,555,936 cycles:u # 3.270 GHz
946,375,750,355,261 instructions:u # 0.93 insn per cycle
177,175,273,188,835 branches:u # 569.311 M/sec
669,996,596,816 branch-misses:u # 0.38% of all branches
4616.756864564 seconds time elapsed
309134.522799000 seconds user
2085.393017000 seconds sys
After:
Performance counter stats for 'ninja':
366,995,591.49 msec task-clock:u # 66.122 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
277,237,414 page-faults:u # 0.755 K/sec
1,195,848,140,356,295 cycles:u # 3.258 GHz
1,196,705,529,208,597 instructions:u # 1.00 insn per cycle
223,288,886,752,605 branches:u # 608.424 M/sec
701,642,510,605 branch-misses:u # 0.31% of all branches
5550.251663887 seconds time elapsed
364880.132207000 seconds user
2127.891642000 seconds sys
Might need to a more efficient approach.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D84988
Files:
clang/include/clang/Lex/Lexer.h
clang/include/clang/Lex/Preprocessor.h
clang/lib/CodeGen/CoverageMappingGen.cpp
clang/lib/CodeGen/CoverageMappingGen.h
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/Preprocessor.cpp
llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
Index: llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
===================================================================
--- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -481,15 +481,6 @@
bool GapRegion = CR.value().Kind == CounterMappingRegion::GapRegion;
- // Try to emit a segment for the current region.
- if (CurStartLoc == CR.value().endLoc()) {
- // Avoid making zero-length regions active. If it's the last region,
- // emit a skipped segment. Otherwise use its predecessor's count.
- const bool Skipped = (CR.index() + 1) == Regions.size();
- startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(),
- CurStartLoc, !GapRegion, Skipped);
- continue;
- }
if (CR.index() + 1 == Regions.size() ||
CurStartLoc != Regions[CR.index() + 1].startLoc()) {
// Emit a segment if the next region doesn't start at the same location
@@ -586,7 +577,7 @@
for (unsigned I = 1, E = Segments.size(); I < E; ++I) {
const auto &L = Segments[I - 1];
const auto &R = Segments[I];
- if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) {
+ if (!(L.Line <= R.Line) && !(L.Line == R.Line && L.Col <= R.Col)) {
LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col
<< " followed by " << R.Line << ":" << R.Col << "\n");
assert(false && "Coverage segments not unique or sorted");
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -1417,6 +1417,8 @@
CommentHandler::~CommentHandler() = default;
+EmptylineHandler::~EmptylineHandler() = default;
+
CodeCompletionHandler::~CodeCompletionHandler() = default;
void Preprocessor::createPreprocessingRecord() {
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -125,6 +125,8 @@
// Default to not keeping comments.
ExtendedTokenMode = 0;
+
+ NewLinePtr = nullptr;
}
/// Lexer constructor - Create a new lexer object for the specified buffer
@@ -2197,6 +2199,13 @@
unsigned char Char = *CurPtr;
+ const char *lastNewLine = nullptr;
+ if (SawNewline) {
+ lastNewLine = CurPtr - 1;
+ if (!NewLinePtr)
+ NewLinePtr = CurPtr - 1;
+ }
+
// Skip consecutive spaces efficiently.
while (true) {
// Skip horizontal whitespace very aggressively.
@@ -2214,6 +2223,9 @@
}
// OK, but handle newline.
+ lastNewLine = CurPtr;
+ if (!NewLinePtr)
+ NewLinePtr = CurPtr;
SawNewline = true;
Char = *++CurPtr;
}
@@ -2237,6 +2249,13 @@
if (SawNewline) {
Result.setFlag(Token::StartOfLine);
TokAtPhysicalStartOfLine = true;
+
+ if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine) {
+ if (auto *Handler = PP->getEmptylineHandler())
+ Handler->HandleEmptyline(SourceRange(getSourceLocation(NewLinePtr + 1),
+ getSourceLocation(lastNewLine)),
+ getSourceLocation(CurPtr));
+ }
}
BufferPtr = CurPtr;
@@ -2377,7 +2396,7 @@
// contribute to another token), it isn't needed for correctness. Note that
// this is ok even in KeepWhitespaceMode, because we would have returned the
/// comment above in that mode.
- ++CurPtr;
+ NewLinePtr = CurPtr++;
// The next returned token is at the start of the line.
Result.setFlag(Token::StartOfLine);
@@ -3211,6 +3230,9 @@
char Char = getAndAdvanceChar(CurPtr, Result);
tok::TokenKind Kind;
+ if (Char != '\n')
+ NewLinePtr = nullptr;
+
switch (Char) {
case 0: // Null.
// Found end of file?
@@ -3265,6 +3287,7 @@
// Since we consumed a newline, we are back at the start of a line.
IsAtStartOfLine = true;
IsAtPhysicalStartOfLine = true;
+ NewLinePtr = CurPtr - 1;
Kind = tok::eod;
break;
Index: clang/lib/CodeGen/CoverageMappingGen.h
===================================================================
--- clang/lib/CodeGen/CoverageMappingGen.h
+++ clang/lib/CodeGen/CoverageMappingGen.h
@@ -45,7 +45,9 @@
/// Stores additional source code information like skipped ranges which
/// is required by the coverage mapping generator and is obtained from
/// the preprocessor.
-class CoverageSourceInfo : public PPCallbacks, public CommentHandler {
+class CoverageSourceInfo : public PPCallbacks,
+ public CommentHandler,
+ public EmptylineHandler {
// A vector of skipped source ranges and PrevTokLoc with NextTokLoc.
std::vector<SkippedRange> SkippedRanges;
bool AfterComment = false;
@@ -61,6 +63,8 @@
void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override;
+ void HandleEmptyline(SourceRange Range, SourceLocation NextTokLoc) override;
+
bool HandleComment(Preprocessor &PP, SourceRange Range) override;
void updateNextTokLoc(SourceLocation Loc);
Index: clang/lib/CodeGen/CoverageMappingGen.cpp
===================================================================
--- clang/lib/CodeGen/CoverageMappingGen.cpp
+++ clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -40,6 +40,7 @@
CoverageSourceInfo *CoverageInfo = new CoverageSourceInfo();
PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(CoverageInfo));
PP.addCommentHandler(CoverageInfo);
+ PP.setEmptylineHandler(CoverageInfo);
PP.setPreprocessToken(true);
PP.setTokenWatcher([CoverageInfo](clang::Token Tok) {
// Update previous token location.
@@ -53,6 +54,11 @@
SkippedRanges.push_back({Range});
}
+void CoverageSourceInfo::HandleEmptyline(SourceRange Range,
+ SourceLocation NextTokLoc) {
+ SkippedRanges.push_back({Range, SourceLocation(), NextTokLoc});
+}
+
bool CoverageSourceInfo::HandleComment(Preprocessor &PP, SourceRange Range) {
SkippedRanges.push_back({Range, PrevTokLoc});
AfterComment = true;
@@ -308,19 +314,20 @@
SpellingRegion SR,
SourceLocation PrevTokLoc,
SourceLocation NextTokLoc) {
- // If Range begin location is invalid, it's not a comment region.
- if (PrevTokLoc.isInvalid())
- return SR;
- unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc);
- unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc);
SpellingRegion newSR(SR);
- if (SR.LineStart == PrevTokLine) {
- newSR.LineStart = SR.LineStart + 1;
- newSR.ColumnStart = 1;
+ if (PrevTokLoc.isValid()) {
+ unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc);
+ if (SR.LineStart == PrevTokLine) {
+ newSR.LineStart = SR.LineStart + 1;
+ newSR.ColumnStart = 1;
+ }
}
- if (SR.LineEnd == NextTokLine) {
- newSR.LineEnd = SR.LineEnd - 1;
- newSR.ColumnEnd = SR.ColumnStart + 1;
+ if (NextTokLoc.isValid()) {
+ unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc);
+ if (SR.LineEnd == NextTokLine) {
+ newSR.LineEnd = SR.LineEnd - 1;
+ newSR.ColumnEnd = newSR.ColumnStart + 1;
+ }
}
if (newSR.isInSourceOrder())
return newSR;
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -67,6 +67,7 @@
class CommentHandler;
class DirectoryEntry;
class DirectoryLookup;
+class EmptylineHandler;
class ExternalPreprocessorSource;
class FileEntry;
class FileManager;
@@ -256,6 +257,9 @@
/// with this preprocessor.
std::vector<CommentHandler *> CommentHandlers;
+ /// Empty line handler.
+ EmptylineHandler *Emptyline = nullptr;
+
/// True if we want to ignore EOF token and continue later on (thus
/// avoid tearing the Lexer and etc. down).
bool IncrementalProcessing = false;
@@ -1219,6 +1223,11 @@
/// Install empty handlers for all pragmas (making them ignored).
void IgnorePragmas();
+ /// Set empty line handler.
+ void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
+
+ EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
+
/// Add the specified comment handler to the preprocessor.
void addCommentHandler(CommentHandler *Handler);
@@ -2390,6 +2399,17 @@
virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
};
+/// Abstract base class that describes a handler that will receive
+/// source ranges for empty lines encountered in the source file.
+class EmptylineHandler {
+public:
+ virtual ~EmptylineHandler();
+
+ // The handler handles empty lines.
+ virtual void HandleEmptyline(SourceRange Range,
+ SourceLocation NextTokLoc) = 0;
+};
+
/// Registry of pragma handlers added by plugins
using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
Index: clang/include/clang/Lex/Lexer.h
===================================================================
--- clang/include/clang/Lex/Lexer.h
+++ clang/include/clang/Lex/Lexer.h
@@ -128,6 +128,8 @@
bool HasLeadingEmptyMacro;
+ const char *NewLinePtr;
+
// CurrentConflictMarkerState - The kind of conflict marker we are handling.
ConflictMarkerKind CurrentConflictMarkerState;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits