hokein created this revision.
hokein added reviewers: sammccall, nickdesaulniers.
Herald added a subscriber: kadircet.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: ilya-biryukov.
Herald added a project: clang.

This patch introduces a separate Offset array in SourceManager, it is redundant
and merely used for searching. This "AOS vs. SOA" optimization give us a large
memory locality win, see the data below.

This is a low-hanging fruit optimization (without significantly changing the
SlocEntry and its underlying storge in SourceManager). The sad bit is thatit
increases SourceManager memory usage, however it is a small fraction (< 10%) of
clang AST memory usage, which I think it is mostly negligible.

getFileIDLoaded is excluded in this patch, it is trickier due to the lazy load 
of
SLocEntry, unclear we will gain as much speedup as the local one.

Speedup:

  Linux kernel: getFileIDLocal overhead is reduced to 1.66% (~30% saving)

Memory usage in bytes (AST built in clangd):

  SemaExpr.cpp:  149529494 -> 149502874
  FindTarget.cpp  83177157 ->  83177569


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135440

Files:
  clang/include/clang/Basic/SourceManager.h
  clang/lib/Basic/SourceManager.cpp


Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -336,6 +336,7 @@
 void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
+  LocalLocOffsetTable.clear();
   LoadedSLocEntryTable.clear();
   SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
@@ -618,6 +619,7 @@
   LocalSLocEntryTable.push_back(
       SLocEntry::get(NextLocalOffset,
                      FileInfo::get(IncludePos, File, FileCharacter, 
Filename)));
+  LocalLocOffsetTable.push_back(NextLocalOffset);
   // We do a +1 here because we want a SourceLocation that means "the end of 
the
   // file", e.g. for the "no newline at the end of the file" diagnostic.
   NextLocalOffset += FileSize + 1;
@@ -669,6 +671,7 @@
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
+  LocalLocOffsetTable.push_back(NextLocalOffset);
   assert(NextLocalOffset + Length + 1 > NextLocalOffset &&
          NextLocalOffset + Length + 1 <= CurrentLoadedOffset &&
          "Ran out of source locations!");
@@ -799,10 +802,10 @@
   // SLocOffset.
   unsigned LessIndex = 0;
   // upper bound of the search range.
-  unsigned GreaterIndex = LocalSLocEntryTable.size();
+  unsigned GreaterIndex = LocalLocOffsetTable.size();
   if (LastFileIDLookup.ID >= 0) {
     // Use the LastFileIDLookup to prune the search space.
-    if (LocalSLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset)
+    if (LocalLocOffsetTable[LastFileIDLookup.ID] < SLocOffset)
       LessIndex = LastFileIDLookup.ID;
     else
       GreaterIndex = LastFileIDLookup.ID;
@@ -812,8 +815,8 @@
   unsigned NumProbes = 0;
   while (true) {
     --GreaterIndex;
-    assert(GreaterIndex < LocalSLocEntryTable.size());
-    if (LocalSLocEntryTable[GreaterIndex].getOffset() <= SLocOffset) {
+    assert(GreaterIndex < LocalLocOffsetTable.size());
+    if (LocalLocOffsetTable[GreaterIndex] <= SLocOffset) {
       FileID Res = FileID::get(int(GreaterIndex));
       // Remember it.  We have good locality across FileID lookups.
       LastFileIDLookup = Res;
@@ -827,8 +830,7 @@
   NumProbes = 0;
   while (true) {
     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
-    SourceLocation::UIntTy MidOffset =
-        getLocalSLocEntry(MiddleIndex).getOffset();
+    SourceLocation::UIntTy MidOffset = LocalLocOffsetTable[MiddleIndex];
 
     ++NumProbes;
 
@@ -840,8 +842,8 @@
     }
 
     // If the middle index contains the value, succeed and return.
-    if (MiddleIndex + 1 == LocalSLocEntryTable.size() ||
-        SLocOffset < getLocalSLocEntry(MiddleIndex + 1).getOffset()) {
+    if (MiddleIndex + 1 == LocalLocOffsetTable.size() ||
+        SLocOffset < LocalLocOffsetTable[MiddleIndex + 1]) {
       FileID Res = FileID::get(MiddleIndex);
 
       // Remember it.  We have good locality across FileID lookups.
@@ -2254,6 +2256,7 @@
 size_t SourceManager::getDataStructureSizes() const {
   size_t size = llvm::capacity_in_bytes(MemBufferInfos)
     + llvm::capacity_in_bytes(LocalSLocEntryTable)
+    + llvm::capacity_in_bytes(LocalLocOffsetTable)
     + llvm::capacity_in_bytes(LoadedSLocEntryTable)
     + llvm::capacity_in_bytes(SLocEntryLoaded)
     + llvm::capacity_in_bytes(FileInfos);
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -692,6 +692,9 @@
   /// Positive FileIDs are indexes into this table. Entry 0 indicates an 
invalid
   /// expansion.
   SmallVector<SrcMgr::SLocEntry, 0> LocalSLocEntryTable;
+  /// An in-parallel SlocEntry offset table, merely used for speeding up the
+  /// FileID lookup.
+  SmallVector<SourceLocation::UIntTy> LocalLocOffsetTable;
 
   /// The table of SLocEntries that are loaded from other modules.
   ///


Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -336,6 +336,7 @@
 void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
+  LocalLocOffsetTable.clear();
   LoadedSLocEntryTable.clear();
   SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
@@ -618,6 +619,7 @@
   LocalSLocEntryTable.push_back(
       SLocEntry::get(NextLocalOffset,
                      FileInfo::get(IncludePos, File, FileCharacter, Filename)));
+  LocalLocOffsetTable.push_back(NextLocalOffset);
   // We do a +1 here because we want a SourceLocation that means "the end of the
   // file", e.g. for the "no newline at the end of the file" diagnostic.
   NextLocalOffset += FileSize + 1;
@@ -669,6 +671,7 @@
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
+  LocalLocOffsetTable.push_back(NextLocalOffset);
   assert(NextLocalOffset + Length + 1 > NextLocalOffset &&
          NextLocalOffset + Length + 1 <= CurrentLoadedOffset &&
          "Ran out of source locations!");
@@ -799,10 +802,10 @@
   // SLocOffset.
   unsigned LessIndex = 0;
   // upper bound of the search range.
-  unsigned GreaterIndex = LocalSLocEntryTable.size();
+  unsigned GreaterIndex = LocalLocOffsetTable.size();
   if (LastFileIDLookup.ID >= 0) {
     // Use the LastFileIDLookup to prune the search space.
-    if (LocalSLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset)
+    if (LocalLocOffsetTable[LastFileIDLookup.ID] < SLocOffset)
       LessIndex = LastFileIDLookup.ID;
     else
       GreaterIndex = LastFileIDLookup.ID;
@@ -812,8 +815,8 @@
   unsigned NumProbes = 0;
   while (true) {
     --GreaterIndex;
-    assert(GreaterIndex < LocalSLocEntryTable.size());
-    if (LocalSLocEntryTable[GreaterIndex].getOffset() <= SLocOffset) {
+    assert(GreaterIndex < LocalLocOffsetTable.size());
+    if (LocalLocOffsetTable[GreaterIndex] <= SLocOffset) {
       FileID Res = FileID::get(int(GreaterIndex));
       // Remember it.  We have good locality across FileID lookups.
       LastFileIDLookup = Res;
@@ -827,8 +830,7 @@
   NumProbes = 0;
   while (true) {
     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
-    SourceLocation::UIntTy MidOffset =
-        getLocalSLocEntry(MiddleIndex).getOffset();
+    SourceLocation::UIntTy MidOffset = LocalLocOffsetTable[MiddleIndex];
 
     ++NumProbes;
 
@@ -840,8 +842,8 @@
     }
 
     // If the middle index contains the value, succeed and return.
-    if (MiddleIndex + 1 == LocalSLocEntryTable.size() ||
-        SLocOffset < getLocalSLocEntry(MiddleIndex + 1).getOffset()) {
+    if (MiddleIndex + 1 == LocalLocOffsetTable.size() ||
+        SLocOffset < LocalLocOffsetTable[MiddleIndex + 1]) {
       FileID Res = FileID::get(MiddleIndex);
 
       // Remember it.  We have good locality across FileID lookups.
@@ -2254,6 +2256,7 @@
 size_t SourceManager::getDataStructureSizes() const {
   size_t size = llvm::capacity_in_bytes(MemBufferInfos)
     + llvm::capacity_in_bytes(LocalSLocEntryTable)
+    + llvm::capacity_in_bytes(LocalLocOffsetTable)
     + llvm::capacity_in_bytes(LoadedSLocEntryTable)
     + llvm::capacity_in_bytes(SLocEntryLoaded)
     + llvm::capacity_in_bytes(FileInfos);
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -692,6 +692,9 @@
   /// Positive FileIDs are indexes into this table. Entry 0 indicates an invalid
   /// expansion.
   SmallVector<SrcMgr::SLocEntry, 0> LocalSLocEntryTable;
+  /// An in-parallel SlocEntry offset table, merely used for speeding up the
+  /// FileID lookup.
+  SmallVector<SourceLocation::UIntTy> LocalLocOffsetTable;
 
   /// The table of SLocEntries that are loaded from other modules.
   ///
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to