Author: Haojian Wu Date: 2022-10-07T09:16:08+02:00 New Revision: 74e4f778cf16cbf7163b5c6de6027a43f5e9169f
URL: https://github.com/llvm/llvm-project/commit/74e4f778cf16cbf7163b5c6de6027a43f5e9169f DIFF: https://github.com/llvm/llvm-project/commit/74e4f778cf16cbf7163b5c6de6027a43f5e9169f.diff LOG: [Lex] Simplify and cleanup the updateConsecutiveMacroArgTokens implementation. The code falls back to the pre-2011 partition-file-id solution (see for [details](https://reviews.llvm.org/D20401#3823476)). This patch simplifies/rewrites the code based on the partition-based-on-file-id idea. The new implementation is optimized by reducing the number of calling getFileID (~40% drop). Despite the huge drop of getFileID, this is a marignal improvment on speed (becase the number of calling non-cached getFileID is roughly the same). It removes the evaluation-order performance gap between gcc-built-clang and clang-built-clang. SemaExpr.cpp: - before: 315063 SLocEntries, FileID scans: 388230 linear, 1393437 binary. 458893 cache hits, 672299 getFileID calls - after: 313494 SLocEntries, FileID scans: 397525 linear, 1451890 binary, 176714 cache hits, 397144 getFileID calls FindTarget.cpp: - before: 279984 SLocEntries, FileID scans: 361926 linear, 1275930 binary, 436072 cache hits, 632150 getFileID calls - after: 278426 SLocEntries, FileID scans: 371279 linear, 1333963 binary, 153705 cache hits, 356814 getFileID calls Differential Revision: https://reviews.llvm.org/D134942 Added: Modified: clang/lib/Lex/TokenLexer.cpp Removed: ################################################################################ diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index efda6d0046fa1..ecbc764a61ac2 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -25,6 +25,7 @@ #include "clang/Lex/Token.h" #include "clang/Lex/VariadicMacroSupport.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" @@ -987,62 +988,57 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation InstLoc, Token *&begin_tokens, Token * end_tokens) { - assert(begin_tokens < end_tokens); - - SourceLocation FirstLoc = begin_tokens->getLocation(); - SourceLocation CurLoc = FirstLoc; - - // Compare the source location offset of tokens and group together tokens that - // are close, even if their locations point to diff erent FileIDs. e.g. - // - // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) - // ^ ^ - // |bar foo cake| (one SLocEntry chunk for all tokens) - // - // we can perform this "merge" since the token's spelling location depends - // on the relative offset. - - Token *NextTok = begin_tokens + 1; - for (; NextTok < end_tokens; ++NextTok) { - SourceLocation NextLoc = NextTok->getLocation(); - if (CurLoc.isFileID() != NextLoc.isFileID()) - break; // Token from diff erent kind of FileID. - - SourceLocation::IntTy RelOffs; - if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) - break; // Token from diff erent local/loaded location. - // Check that token is not before the previous token or more than 50 - // "characters" away. - if (RelOffs < 0 || RelOffs > 50) - break; - - if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc)) - break; // Token from a diff erent macro. - - CurLoc = NextLoc; + assert(begin_tokens + 1 < end_tokens); + SourceLocation BeginLoc = begin_tokens->getLocation(); + llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens); + llvm::MutableArrayRef<Token> Partition; + + // Partition the tokens by their FileID. + // This is a hot function, and calling getFileID can be expensive, the + // implementation is optimized by reducing the number of getFileID. + if (BeginLoc.isFileID()) { + // Consecutive tokens not written in macros must be from the same file. + // (Neither #include nor eof can occur inside a macro argument.) + Partition = All.take_while([&](const Token &T) { + return T.getLocation().isFileID(); + }); + } else { + // Call getFileID once to calculate the bounds, and use the cheaper + // sourcelocation-against-bounds comparison. + FileID BeginFID = SM.getFileID(BeginLoc); + SourceLocation Limit = + SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); + Partition = All.take_while([&](const Token &T) { + return T.getLocation() >= BeginLoc && T.getLocation() < Limit; + }); } + assert(!Partition.empty()); // For the consecutive tokens, find the length of the SLocEntry to contain // all of them. - Token &LastConsecutiveTok = *(NextTok-1); - SourceLocation::IntTy LastRelOffs = 0; - SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), - &LastRelOffs); SourceLocation::UIntTy FullLength = - LastRelOffs + LastConsecutiveTok.getLength(); - + Partition.back().getEndLoc().getRawEncoding() - + Partition.front().getLocation().getRawEncoding(); // Create a macro expansion SLocEntry that will "contain" all of the tokens. SourceLocation Expansion = - SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); - + SM.createMacroArgExpansionLoc(BeginLoc, InstLoc, FullLength); + +#ifdef EXPENSIVE_CHECKS + assert(llvm::all_of(Partition.drop_front(), + [&SM, ID = SM.getFileID(Partition.front().getLocation())]( + const Token &T) { + return ID == SM.getFileID(T.getLocation()); + }) && + "Must have the same FIleID!"); +#endif // Change the location of the tokens from the spelling location to the new // expanded location. - for (; begin_tokens < NextTok; ++begin_tokens) { - Token &Tok = *begin_tokens; - SourceLocation::IntTy RelOffs = 0; - SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); - Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); + for (Token& T : Partition) { + SourceLocation::IntTy RelativeOffset = + T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding(); + T.setLocation(Expansion.getLocWithOffset(RelativeOffset)); } + begin_tokens = &Partition.back() + 1; } /// Creates SLocEntries and updates the locations of macro argument _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits