troyj created this revision.
troyj added a reviewer: bruno.
Herald added a project: All.
troyj requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

HeaderSearch already uses a caching system to avoid duplicate searches, but the 
initial cold miss can take a long time if a build system has supplied thousands 
of HeaderMaps. For this case, the SearchDirs vector begins with those 
HeaderMaps, so a cache miss requires testing if the sought filename is present 
in each of those maps. Instead, we can consolidate the keys of those HeaderMaps 
into one StringMap and then each cache miss can skip directly to the correct 
HeaderMap or continue its search beyond the initial sequence of HeaderMaps. In 
testing on TUs with ~15000 SearchDirs where the initial 99% are HeaderMaps, 
time spent in Clang was reduced by 15%. This patch is expected to be neutral 
for SearchDir vectors that do not begin with HeaderMaps.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135801

Files:
  clang/include/clang/Lex/HeaderMap.h
  clang/include/clang/Lex/HeaderSearch.h
  clang/lib/Lex/HeaderSearch.cpp

Index: clang/lib/Lex/HeaderSearch.cpp
===================================================================
--- clang/lib/Lex/HeaderSearch.cpp
+++ clang/lib/Lex/HeaderSearch.cpp
@@ -116,6 +116,7 @@
   NoCurDirSearch = noCurDirSearch;
   SearchDirToHSEntry = std::move(searchDirToHSEntry);
   //LookupFileCache.clear();
+  indexInitialHeaderMaps();
 }
 
 void HeaderSearch::AddSearchPath(const DirectoryLookup &dir, bool isAngled) {
@@ -372,6 +373,29 @@
   return Module;
 }
 
+void HeaderSearch::indexInitialHeaderMaps() {
+  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> Index(SearchDirs.size());
+
+  // Iterate over all filename keys and associate them with the index i.
+  unsigned i = 0;
+  for (; i != SearchDirs.size(); ++i) {
+    auto &Dir = SearchDirs[i];
+
+    // We're concerned with only the initial contiguous run of header
+    // maps within SearchDirs, which can be 99% of SearchDirs when
+    // SearchDirs.size() is ~10000.
+    if (!Dir.isHeaderMap())
+      break;
+
+    auto Callback = [&](StringRef Filename) { Index.try_emplace(Filename, i); };
+
+    Dir.getHeaderMap()->forEachKey(Callback);
+  }
+
+  SearchDirHeaderMapIndex = std::move(Index);
+  FirstNonHeaderMapSearchDirIdx = i;
+}
+
 //===----------------------------------------------------------------------===//
 // File lookup within a DirectoryLookup scope
 //===----------------------------------------------------------------------===//
@@ -977,22 +1001,37 @@
 
   ConstSearchDirIterator NextIt = std::next(It);
 
-  // If the entry has been previously looked up, the first value will be
-  // non-zero.  If the value is equal to i (the start point of our search), then
-  // this is a matching hit.
-  if (!SkipCache && CacheLookup.StartIt == NextIt) {
-    // Skip querying potentially lots of directories for this lookup.
-    if (CacheLookup.HitIt)
-      It = CacheLookup.HitIt;
-    if (CacheLookup.MappedName) {
-      Filename = CacheLookup.MappedName;
-      if (IsMapped)
-        *IsMapped = true;
+  if (!SkipCache) {
+    if (CacheLookup.StartIt == NextIt) {
+      // HIT: Skip querying potentially lots of directories for this lookup.
+      if (CacheLookup.HitIt)
+        It = CacheLookup.HitIt;
+      if (CacheLookup.MappedName) {
+        Filename = CacheLookup.MappedName;
+        if (IsMapped)
+          *IsMapped = true;
+      }
+    } else {
+      // MISS: This is the first query, or the previous query didn't match
+      // our search start.  We will fill in our found location below, so prime
+      // the start point value.
+      CacheLookup.reset(/*NewStartIt=*/NextIt);
+
+      if (It == search_dir_begin() && FirstNonHeaderMapSearchDirIdx > 0) {
+        // Handle cold misses of user includes in the presence of many header
+        // maps.  We avoid searching perhaps thousands of header maps by
+        // jumping directly to the correct one or jumping beyond all of them.
+        auto Iter = SearchDirHeaderMapIndex.find(Filename);
+        if (Iter == SearchDirHeaderMapIndex.end()) {
+          // Not in index => Skip to first SearchDir after initial header maps
+          It = search_dir_nth(FirstNonHeaderMapSearchDirIdx);
+        } else {
+          // In index => Start with a specific header map
+          It = search_dir_nth(Iter->second);
+        }
+      }
     }
   } else {
-    // Otherwise, this is the first query, or the previous query didn't match
-    // our search start.  We will fill in our found location below, so prime the
-    // start point value.
     CacheLookup.reset(/*NewStartIt=*/NextIt);
   }
 
Index: clang/include/clang/Lex/HeaderSearch.h
===================================================================
--- clang/include/clang/Lex/HeaderSearch.h
+++ clang/include/clang/Lex/HeaderSearch.h
@@ -249,6 +249,14 @@
   unsigned SystemDirIdx = 0;
   bool NoCurDirSearch = false;
 
+  /// Maps HeaderMap keys to SearchDir indices. When HeaderMaps are used
+  /// heavily, SearchDirs can start with thousands of HeaderMaps, so this Index
+  /// lets us avoid scanning them all to find a match.
+  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> SearchDirHeaderMapIndex;
+
+  /// The index of the first SearchDir that isn't a header map.
+  unsigned FirstNonHeaderMapSearchDirIdx = 0;
+
   /// \#include prefixes for which the 'system header' property is
   /// overridden.
   ///
@@ -330,6 +338,10 @@
   /// Entity used to look up stored header file information.
   ExternalHeaderFileInfoSource *ExternalSource = nullptr;
 
+  /// Scan all of the header maps at the beginning of SearchDirs and
+  /// map their keys to the SearchDir index of their header map.
+  void indexInitialHeaderMaps();
+
 public:
   HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
                SourceManager &SourceMgr, DiagnosticsEngine &Diags,
@@ -801,6 +813,10 @@
   }
 
   ConstSearchDirIterator search_dir_begin() const { return quoted_dir_begin(); }
+  ConstSearchDirIterator search_dir_nth(size_t n) const {
+    assert(n < SearchDirs.size());
+    return {*this, n};
+  }
   ConstSearchDirIterator search_dir_end() const { return system_dir_end(); }
   ConstSearchDirRange search_dir_range() const {
     return {search_dir_begin(), search_dir_end()};
Index: clang/include/clang/Lex/HeaderMap.h
===================================================================
--- clang/include/clang/Lex/HeaderMap.h
+++ clang/include/clang/Lex/HeaderMap.h
@@ -15,6 +15,7 @@
 
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Lex/HeaderMapTypes.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Compiler.h"
@@ -39,6 +40,22 @@
   // Check for a valid header and extract the byte swap.
   static bool checkHeader(const llvm::MemoryBuffer &File, bool &NeedsByteSwap);
 
+  // Make a call for every Key in the map.
+  template <typename Func> void forEachKey(Func Callback) const {
+    const HMapHeader &Hdr = getHeader();
+    unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+    for (unsigned Bucket = 0; Bucket < NumBuckets; ++Bucket) {
+      HMapBucket B = getBucket(Bucket);
+      if (B.Key != HMAP_EmptyBucketKey) {
+        Optional<StringRef> Key = getString(B.Key);
+        if (Key) {
+          Callback(Key.value());
+        }
+      }
+    }
+  }
+
   /// If the specified relative filename is located in this HeaderMap return
   /// the filename it is mapped to, otherwise return an empty StringRef.
   StringRef lookupFilename(StringRef Filename,
@@ -78,6 +95,7 @@
                                            FileManager &FM);
 
   using HeaderMapImpl::dump;
+  using HeaderMapImpl::forEachKey;
   using HeaderMapImpl::getFileName;
   using HeaderMapImpl::lookupFilename;
   using HeaderMapImpl::reverseLookupFilename;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to