hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a project: All.
hokein requested review of this revision.
Herald added a project: clang.

The code falls back to the pre-2011 partition-file-id solution (see for
details <https://reviews.llvm.org/D20401#3823476>).

This patch simplifies/rewrites the code based on the partition-based-on-file-id
idea. The new implementation is optimized by reducing the number of
calling getFileID (~40% drop).

Despite the huge drop of getFileID, this is a marignal improvment on
speed (becase the number of calling non-cached getFileID is roughly
the same). It removes the evaluation-order performance gap between 
gcc-built-clang
and clang-built-clang.

SemaExpr.cpp:

- before: 315063 SLocEntries, FileID scans: 388230 linear, 1393437 binary. 
458893 cache hits, 672299 getFileID calls
- after:  313494 SLocEntries, FileID scans: 397525 linear, 1451890 binary, 
176714 cache hits, 397144 getFileID calls

FindTarget.cpp:

- before: 279984 SLocEntries, FileID scans: 361926 linear, 1275930 binary, 
436072 cache hits, 632150 getFileID calls
- after:  278426 SLocEntries, FileID scans: 371279 linear, 1333963 binary, 
153705 cache hits, 356814 getFileID calls


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134942

Files:
  clang/lib/Lex/TokenLexer.cpp

Index: clang/lib/Lex/TokenLexer.cpp
===================================================================
--- clang/lib/Lex/TokenLexer.cpp
+++ clang/lib/Lex/TokenLexer.cpp
@@ -25,6 +25,7 @@
 #include "clang/Lex/Token.h"
 #include "clang/Lex/VariadicMacroSupport.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
@@ -989,60 +990,55 @@
                                             Token * end_tokens) {
   assert(begin_tokens < end_tokens);
 
-  SourceLocation FirstLoc = begin_tokens->getLocation();
-  SourceLocation CurLoc = FirstLoc;
-
-  // Compare the source location offset of tokens and group together tokens that
-  // are close, even if their locations point to different FileIDs. e.g.
-  //
-  //  |bar    |  foo | cake   |  (3 tokens from 3 consecutive FileIDs)
-  //  ^                    ^
-  //  |bar       foo   cake|     (one SLocEntry chunk for all tokens)
-  //
-  // we can perform this "merge" since the token's spelling location depends
-  // on the relative offset.
-
-  Token *NextTok = begin_tokens + 1;
-  for (; NextTok < end_tokens; ++NextTok) {
-    SourceLocation NextLoc = NextTok->getLocation();
-    if (CurLoc.isFileID() != NextLoc.isFileID())
-      break; // Token from different kind of FileID.
-
-    SourceLocation::IntTy RelOffs;
-    if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
-      break; // Token from different local/loaded location.
-    // Check that token is not before the previous token or more than 50
-    // "characters" away.
-    if (RelOffs < 0 || RelOffs > 50)
-      break;
-
-    if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
-      break; // Token from a different macro.
-
-    CurLoc = NextLoc;
+  SourceLocation BeginLoc = begin_tokens->getLocation();
+  llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
+  llvm::MutableArrayRef<Token> Partition;
+
+  // Partition the tokens by their FileID.
+  // This is a hot function, and calling getFileID can be expensive, the
+  // implementation is optimized by reducing the number of getFileID.
+  if (BeginLoc.isFileID()) {
+    // We can avoid calling getFileID at all for *file* consecutive tokens --
+    // for this case it is impossible to switch between files in marco arguments
+    // (e.g. we can't use #include in marco arguments, and clang rejects it),
+    // therefore these token must point to the same file.
+    Partition = All.take_while([&](const Token &T) {
+      return T.getLocation().isFileID();
+    });
+    assert(!Partition.empty() &&
+           llvm::all_of(Partition.drop_front(),
+                        [&SM](const Token &T) {
+                          return SM.getFileID((&T - 1)->getLocation()) ==
+                                 SM.getFileID(T.getLocation());
+                        }) &&
+           "Must have the same FIleID!");
+  } else {
+    // Call getFileID once to calculate the bounds, and use the cheaper
+    // sourcelocation-against-bounds comparison.
+    FileID BeginFID = SM.getFileID(BeginLoc);
+    SourceLocation Limit =
+        SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
+    Partition = All.take_while([&](const Token &T) {
+      return T.getLocation() >= BeginLoc && T.getLocation() < Limit;
+    });
   }
-
   // For the consecutive tokens, find the length of the SLocEntry to contain
   // all of them.
-  Token &LastConsecutiveTok = *(NextTok-1);
-  SourceLocation::IntTy LastRelOffs = 0;
-  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
-                           &LastRelOffs);
   SourceLocation::UIntTy FullLength =
-      LastRelOffs + LastConsecutiveTok.getLength();
-
+      Partition.back().getEndLoc().getRawEncoding() -
+      Partition.begin()->getLocation().getRawEncoding();
   // Create a macro expansion SLocEntry that will "contain" all of the tokens.
   SourceLocation Expansion =
-      SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
+      SM.createMacroArgExpansionLoc(BeginLoc, InstLoc, FullLength);
 
   // Change the location of the tokens from the spelling location to the new
   // expanded location.
-  for (; begin_tokens < NextTok; ++begin_tokens) {
-    Token &Tok = *begin_tokens;
-    SourceLocation::IntTy RelOffs = 0;
-    SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
-    Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
+  for (Token& T : Partition) {
+    SourceLocation::IntTy RelativeOffset = T.getLocation().getRawEncoding() -
+       BeginLoc.getRawEncoding();
+    T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
   }
+  begin_tokens = &Partition.back() + 1;
 }
 
 /// Creates SLocEntries and updates the locations of macro argument
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to