Author: Jan Svoboda
Date: 2021-07-20T12:08:46+02:00
New Revision: bc1a2979fc70d954ae97122205c71c8404a1b17e

URL: 
https://github.com/llvm/llvm-project/commit/bc1a2979fc70d954ae97122205c71c8404a1b17e
DIFF: 
https://github.com/llvm/llvm-project/commit/bc1a2979fc70d954ae97122205c71c8404a1b17e.diff

LOG: [clang][deps] Separate filesystem caches for minimized and original files

This patch separates the local and global caches of 
`DependencyScanningFilesystem` into two buckets: minimized files and original 
files. This is necessary to deal with precompiled modules/headers.

Consider a single worker with its instance of filesystem:
1. Build system uses the worker to scan dependencies of module A => filesystem 
cache gets populated with minimized input files.
2. Build system uses the results to explicitly build module A => explicitly 
built module captures the state of the real filesystem (containing 
non-minimized input files).
3. Build system uses the prebuilt module A as an explicit precompiled 
dependency for another compile job B.
4. Build system uses the same worker to scan dependencies for job B => worker 
uses implicit modular build to discover dependencies, which validates the 
filesystem state embedded in the prebuilt module (non-minimized files) to the 
current view of the filesystem (minimized files), resulting in validation 
failures.

This problem can be avoided in step 4 by collecting input files from the 
precompiled module and marking them as "ignored" in the minimizing filesystem. 
This way, the validation should succeed, since we should be always dealing with 
the original (non-minized) input files. However, the filesystem already 
minimized the input files in step 1 and put it in the cache, which gets used in 
step 4 as well even though it's marked ignored (do not minimize). This patch 
essentially fixes this oversight by making the `"file is minimized"` part of 
the cache key (from high level).

Depends on D106064.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D106146

Added: 
    

Modified: 
    
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/unittests/Tooling/CMakeLists.txt
    clang/unittests/Tooling/DependencyScannerTest.cpp

Removed: 
    


################################################################################
diff  --git 
a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h 
b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index b213f388dadc0..c52da3305f7c1 100644
--- 
a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ 
b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -103,7 +103,8 @@ class CachedFileSystemEntry {
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to 
the
-/// underlying real file system.
+/// underlying real file system. It distinguishes between minimized and 
original
+/// files.
 ///
 /// It is sharded based on the hash of the key to reduce the lock contention 
for
 /// the worker threads.
@@ -114,21 +115,62 @@ class DependencyScanningFilesystemSharedCache {
     CachedFileSystemEntry Value;
   };
 
-  DependencyScanningFilesystemSharedCache();
-
   /// Returns a cache entry for the corresponding key.
   ///
   /// A new cache entry is created if the key is not in the cache. This is a
   /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  SharedFileSystemEntry &get(StringRef Key, bool Minimized);
 
 private:
-  struct CacheShard {
-    std::mutex CacheLock;
-    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+  class SingleCache {
+  public:
+    SingleCache();
+
+    SharedFileSystemEntry &get(StringRef Key);
+
+  private:
+    struct CacheShard {
+      std::mutex CacheLock;
+      llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+    };
+    std::unique_ptr<CacheShard[]> CacheShards;
+    unsigned NumShards;
   };
-  std::unique_ptr<CacheShard[]> CacheShards;
-  unsigned NumShards;
+
+  SingleCache CacheMinimized;
+  SingleCache CacheOriginal;
+};
+
+/// This class is a local cache, that caches the 'stat' and 'open' calls to the
+/// underlying real file system. It distinguishes between minimized and 
original
+/// files.
+class DependencyScanningFilesystemLocalCache {
+private:
+  using SingleCache =
+      llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>;
+
+  SingleCache CacheMinimized;
+  SingleCache CacheOriginal;
+
+  SingleCache &selectCache(bool Minimized) {
+    return Minimized ? CacheMinimized : CacheOriginal;
+  }
+
+public:
+  void setCachedEntry(StringRef Filename, bool Minimized,
+                      const CachedFileSystemEntry *Entry) {
+    SingleCache &Cache = selectCache(Minimized);
+    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
+    (void)IsInserted;
+    assert(IsInserted && "local cache is updated more than once");
+  }
+
+  const CachedFileSystemEntry *getCachedEntry(StringRef Filename,
+                                              bool Minimized) {
+    SingleCache &Cache = selectCache(Minimized);
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
 };
 
 /// A virtual file system optimized for the dependency discovery.
@@ -159,24 +201,14 @@ class DependencyScanningWorkerFilesystem : public 
llvm::vfs::ProxyFileSystem {
 private:
   bool shouldIgnoreFile(StringRef Filename);
 
-  void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
-    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
-    (void)IsInserted;
-    assert(IsInserted && "local cache is updated more than once");
-  }
-
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
-    auto It = Cache.find(Filename);
-    return It == Cache.end() ? nullptr : It->getValue();
-  }
-
   llvm::ErrorOr<const CachedFileSystemEntry *>
   getOrCreateFileSystemEntry(const StringRef Filename);
 
+  /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
-  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
+  DependencyScanningFilesystemLocalCache Cache;
   /// The optional mapping structure which records information about the
   /// excluded conditional directive skip mappings that are used by the
   /// currently active preprocessor.

diff  --git 
a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp 
b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 88b835e159f3c..166d4a25363cf 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -99,8 +99,7 @@ CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status 
&&Stat) {
   return Result;
 }
 
-DependencyScanningFilesystemSharedCache::
-    DependencyScanningFilesystemSharedCache() {
+DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
   // This heuristic was chosen using a empirical testing on a
   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
   // sharding gives a performance edge by reducing the lock contention.
@@ -111,18 +110,20 @@ DependencyScanningFilesystemSharedCache::
   CacheShards = std::make_unique<CacheShard[]>(NumShards);
 }
 
-/// Returns a cache entry for the corresponding key.
-///
-/// A new cache entry is created if the key is not in the cache. This is a
-/// thread safe call.
 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
+DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
   std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
   auto It = Shard.Cache.try_emplace(Key);
   return It.first->getValue();
 }
 
+DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
+  SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
+  return Cache.get(Key);
+}
+
 /// Whitelist file extensions that should be minimized, treating no extension 
as
 /// a source file that should be minimized.
 ///
@@ -165,17 +166,17 @@ bool DependencyScanningWorkerFilesystem::shouldIgnoreFile(
 llvm::ErrorOr<const CachedFileSystemEntry *>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     const StringRef Filename) {
-  if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) {
+  bool ShouldMinimize =
+      !IgnoredFiles.count(Filename) && shouldMinimize(Filename);
+
+  if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
     return Entry;
-  }
 
   // FIXME: Handle PCM/PCH files.
   // FIXME: Handle module map files.
 
-  bool KeepOriginalSource = shouldIgnoreFile(Filename) ||
-                            !shouldMinimize(Filename);
   DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
-      &SharedCacheEntry = SharedCache.get(Filename);
+      &SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
   const CachedFileSystemEntry *Result;
   {
     std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
@@ -197,15 +198,15 @@ 
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
         CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
             std::move(*MaybeStatus));
       else
-        CacheEntry = CachedFileSystemEntry::createFileEntry(
-            Filename, FS, !KeepOriginalSource);
+        CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS,
+                                                            ShouldMinimize);
     }
 
     Result = &CacheEntry;
   }
 
   // Store the result in the local cache.
-  setCachedEntry(Filename, Result);
+  Cache.setCachedEntry(Filename, ShouldMinimize, Result);
   return Result;
 }
 

diff  --git a/clang/unittests/Tooling/CMakeLists.txt 
b/clang/unittests/Tooling/CMakeLists.txt
index 3ed6992d81f50..01eb8d67dadac 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -68,6 +68,7 @@ clang_target_link_libraries(ToolingTests
   clangAST
   clangASTMatchers
   clangBasic
+  clangDependencyScanning
   clangFormat
   clangFrontend
   clangLex

diff  --git a/clang/unittests/Tooling/DependencyScannerTest.cpp 
b/clang/unittests/Tooling/DependencyScannerTest.cpp
index 5a9c514c07489..538b36b4b2edc 100644
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -15,6 +15,7 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "clang/Tooling/Tooling.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
@@ -203,5 +204,35 @@ TEST(DependencyScanner, 
ScanDepsReuseFilemanagerHasInclude) {
   EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
 }
 
+namespace dependencies {
+TEST(DependencyScanningFilesystem, IgnoredFilesHaveSeparateCache) {
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  VFS->addFile("/mod.h", 0, llvm::MemoryBuffer::getMemBuffer("// hi 
there!\n"));
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  auto Mappings = std::make_unique<ExcludedPreprocessorDirectiveSkipMapping>();
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get());
+
+  auto StatusMinimized0 = DepFS.status("/mod.h");
+  DepFS.ignoreFile("/mod.h");
+  auto StatusFull1 = DepFS.status("/mod.h");
+  DepFS.clearIgnoredFiles();
+
+  auto StatusMinimized2 = DepFS.status("/mod.h");
+  DepFS.ignoreFile("/mod.h");
+  auto StatusFull3 = DepFS.status("/mod.h");
+
+  EXPECT_TRUE(StatusMinimized0);
+  EXPECT_EQ(StatusMinimized0->getSize(), 0u);
+  EXPECT_TRUE(StatusFull1);
+  EXPECT_EQ(StatusFull1->getSize(), 13u);
+
+  EXPECT_TRUE(StatusMinimized2);
+  EXPECT_EQ(StatusMinimized2->getSize(), 0u);
+  EXPECT_TRUE(StatusFull3);
+  EXPECT_EQ(StatusFull3->getSize(), 13u);
+}
+
+} // end namespace dependencies
 } // end namespace tooling
 } // end namespace clang


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to