jansvoboda11 updated this revision to Diff 395684.
jansvoboda11 added a comment.

Erase filenames in temporary `Stat` objects, use `UniqueID` as shard key where 
appropriate.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114966/new/

https://reviews.llvm.org/D114966

Files:
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/unittests/Tooling/DependencyScannerTest.cpp

Index: clang/unittests/Tooling/DependencyScannerTest.cpp
===================================================================
--- clang/unittests/Tooling/DependencyScannerTest.cpp
+++ clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -224,6 +224,8 @@
   EXPECT_TRUE(StatusFull1);
   EXPECT_EQ(StatusMinimized0->getSize(), 17u);
   EXPECT_EQ(StatusFull1->getSize(), 30u);
+  EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
 }
 
 TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
@@ -245,6 +247,8 @@
   EXPECT_TRUE(StatusMinimized1);
   EXPECT_EQ(StatusFull0->getSize(), 30u);
   EXPECT_EQ(StatusMinimized1->getSize(), 17u);
+  EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
 }
 
 } // end namespace dependencies
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -16,10 +16,10 @@
 using namespace tooling;
 using namespace dependencies;
 
-llvm::ErrorOr<llvm::vfs::Status>
-CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
+llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
+DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
   // Load the file and its content from the file system.
-  auto MaybeFile = FS.openFileForRead(Filename);
+  auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
   if (!MaybeFile)
     return MaybeFile.getError();
   auto File = std::move(*MaybeFile);
@@ -34,24 +34,42 @@
     return MaybeBuffer.getError();
   auto Buffer = std::move(*MaybeBuffer);
 
-  OriginalContents = std::move(Buffer);
-  return Stat;
+  // If the file size changed between read and stat, pretend it didn't.
+  if (Stat.getSize() != Buffer->getBufferSize())
+    Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
+
+  return TentativeEntry(Stat, std::move(Buffer));
 }
 
-void CachedFileSystemEntry::minimizeFile() {
-  assert(OriginalContents && "minimizing missing contents");
+EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
+    const CachedFileSystemEntry &Entry, StringRef Filename) {
+  if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
+    return EntryRef(/*Minimized=*/false, Filename, Entry);
+
+  CachedFileContents *Contents = Entry.getContents();
+  assert(Contents && "contents not allocated");
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
+
+  std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
 
   llvm::SmallString<1024> MinimizedFileContents;
   // Minimize the file down to directives that might affect the dependencies.
   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
-  if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
+  if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
                                            MinimizedFileContents, Tokens)) {
     // FIXME: Propagate the diagnostic if desired by the client.
     // Use the original file if the minimization failed.
-    MinimizedContentsStorage =
-        llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
-    MinimizedContentsAccess.store(MinimizedContentsStorage.get());
-    return;
+    Contents->MinimizedStorage =
+        llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
+    Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
   }
 
   // The contents produced by the minimizer must be null terminated.
@@ -74,16 +92,17 @@
     }
     Mapping[Range.Offset] = Range.Length;
   }
-  PPSkippedRangeMapping = std::move(Mapping);
+  Contents->PPSkippedRangeMapping = std::move(Mapping);
 
-  MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+  Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
       std::move(MinimizedFileContents));
-  // The algorithm in `getOrCreateFileSystemEntry` uses the presence of
-  // minimized contents to decide whether an entry is up-to-date or not.
-  // If it is up-to-date, the skipped range mappings must be already computed.
-  // This is why we need to store the minimized contents **after** storing the
-  // skipped range mappings. Failing to do so would lead to a data race.
-  MinimizedContentsAccess.store(MinimizedContentsStorage.get());
+  // This function performed double-checked locking using `MinimizedAccess`.
+  // Assigning it must be the last thing this function does. If we were to
+  // assign it before `PPSkippedRangeMapping`, other threads may skip the
+  // critical section (`MinimizedAccess != nullptr`) and access the mappings
+  // that are about to be initialized, leading to a data race.
+  Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+  return EntryRef(/*Minimized=*/true, Filename, Entry);
 }
 
 DependencyScanningFilesystemSharedCache::
@@ -98,12 +117,77 @@
   CacheShards = std::make_unique<CacheShard[]>(NumShards);
 }
 
-DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
-  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
-  std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
-  auto It = Shard.Cache.try_emplace(Key);
-  return It.first->getValue();
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForFilename(
+    StringRef Filename) const {
+  return CacheShards[llvm::hash_value(Filename) % NumShards];
+}
+
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForUID(
+    llvm::sys::fs::UniqueID UID) const {
+  auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
+  return CacheShards[Hash % NumShards];
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
+    StringRef Filename) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByFilename.find(Filename);
+  return It == EntriesByFilename.end() ? nullptr : It->getValue();
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
+    llvm::sys::fs::UniqueID UID) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByUID.find(UID);
+  return It == EntriesByUID.end() ? nullptr : It->getSecond();
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
+  if (Insertion.second)
+    Insertion.first->second =
+        new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
+    llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+    CachedFileContents *Contents) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByUID.insert({UID, nullptr});
+  if (Insertion.second)
+    Insertion.first->second = new (EntryStorage.Allocate())
+        CachedFileSystemEntry(std::move(Stat), Contents);
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
+}
+
+CachedFileContents &
+DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceContentsForUID(
+    llvm::sys::fs::UniqueID UID,
+    std::unique_ptr<llvm::MemoryBuffer> OriginalContents) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = ContentsCache.insert({UID, nullptr});
+  if (Insertion.second)
+    Insertion.first->second = new (ContentsStorage.Allocate())
+        CachedFileContents(std::move(OriginalContents));
+  return *Insertion.first->second;
 }
 
 /// Whitelist file extensions that should be minimized, treating no extension as
@@ -148,53 +232,66 @@
   return !NotToBeMinimized.contains(Filename);
 }
 
-void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
-                                 StringRef Filename,
-                                 llvm::vfs::FileSystem &FS) {
-  if (!MaybeStatus || MaybeStatus->isDirectory())
-    MaybeStat = std::move(MaybeStatus);
-  else
-    MaybeStat = initFile(Filename, FS);
+const CachedFileSystemEntry &
+DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
+    TentativeEntry TEntry) {
+  auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
+  CachedFileContents *Contents = nullptr;
+  if (TEntry.Contents)
+    Contents = &Shard.getOrEmplaceContentsForUID(TEntry.Status.getUniqueID(),
+                                                 std::move(TEntry.Contents));
+  return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
+                                       std::move(TEntry.Status), Contents);
 }
 
-llvm::ErrorOr<EntryRef>
-DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+const CachedFileSystemEntry *
+DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
     StringRef Filename) {
-  bool ShouldBeMinimized = shouldMinimize(Filename);
-
-  const auto *Entry = LocalCache.getCachedEntry(Filename);
-  if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
-    return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
-
-  // FIXME: Handle PCM/PCH files.
-  // FIXME: Handle module map files.
-
-  auto &SharedCacheEntry = SharedCache.get(Filename);
-  {
-    std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
-    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
-
-    if (!CacheEntry.isInitialized()) {
-      auto MaybeStatus = getUnderlyingFS().status(Filename);
-      if (!MaybeStatus && !shouldCacheStatFailures(Filename))
-        // HACK: We need to always restat non source files if the stat fails.
-        //   This is because Clang first looks up the module cache and module
-        //   files before building them, and then looks for them again. If we
-        //   cache the stat failure, it won't see them the second time.
-        return MaybeStatus.getError();
-      CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
-    }
+  if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
+    return Entry;
+  auto &Shard = SharedCache.getShardForFilename(Filename);
+  if (const auto *Entry = Shard.findEntryByFilename(Filename))
+    return &LocalCache.insertEntryForFilename(Filename, *Entry);
+  return nullptr;
+}
 
-    // Checking `needsUpdate` verifies the entry represents an opened file.
-    // Only checking `needsMinimization` could lead to minimization of files
-    // that we failed to load (such files don't have `OriginalContents`).
-    if (CacheEntry.needsUpdate(ShouldBeMinimized))
-      CacheEntry.minimizeFile();
+llvm::ErrorOr<const CachedFileSystemEntry &>
+DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
+  llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
+  if (!Stat) {
+    if (!shouldCacheStatFailures(Filename))
+      return Stat.getError();
+    const auto &Entry =
+        getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
+    return insertLocalEntryForFilename(Filename, Entry);
   }
 
-  // Store the result in the local cache.
-  Entry = &SharedCacheEntry.Value;
-  return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
+  if (const auto *Entry = findSharedEntryByUID(*Stat))
+    return insertLocalEntryForFilename(Filename, *Entry);
+
+  auto TEntry =
+      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
+
+  const CachedFileSystemEntry *SharedEntry = [&]() {
+    if (TEntry) {
+      const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
+      return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
+    }
+    return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
+  }();
+
+  return insertLocalEntryForFilename(Filename, *SharedEntry);
+}
+
+llvm::ErrorOr<EntryRef>
+DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+    StringRef Filename) {
+  if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
+    return minimizeIfNecessary(*Entry, Filename).unwrapError();
+  auto MaybeEntry = computeAndStoreResult(Filename);
+  if (!MaybeEntry)
+    return MaybeEntry.getError();
+  return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>
@@ -247,7 +344,8 @@
     return std::make_error_code(std::errc::is_a_directory);
 
   auto Result = std::make_unique<MinimizedVFSFile>(
-      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(),
+      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
+                                       Entry.getStatus().getName(),
                                        /*RequiresNullTerminator=*/false),
       Entry.getStatus());
 
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -22,6 +22,26 @@
 namespace tooling {
 namespace dependencies {
 
+/// Original and minimized contents of a cached file entry. Single instance can
+/// be shared between multiple entries.
+struct CachedFileContents {
+  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
+      : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> Original;
+
+  /// The mutex that must be locked before mutating minimized contents.
+  std::mutex ValueLock;
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+  /// Accessor to the minimized contents that's atomic to avoid data races.
+  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
+  /// Skipped range mapping of the minimized contents.
+  /// This is initialized iff `MinimizedAccess != nullptr`.
+  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+};
+
 /// An in-memory representation of a file system entity that is of interest to
 /// the dependency scanning filesystem.
 ///
@@ -29,111 +49,89 @@
 /// - opened file with original contents and a stat value,
 /// - opened file with original contents, minimized contents and a stat value,
 /// - directory entry with its stat value,
-/// - filesystem error,
-/// - uninitialized entry with unknown status.
+/// - filesystem error.
 class CachedFileSystemEntry {
 public:
-  /// Creates an uninitialized entry.
-  CachedFileSystemEntry()
-      : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {}
+  /// Creates an entry without contents: either a filesystem error or
+  /// a directory with stat value.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
+      : MaybeStat(std::move(Stat)), ContentsAccess(nullptr) {}
 
-  /// Initialize the cached file system entry.
-  void init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
-            llvm::vfs::FileSystem &FS);
-
-  /// Initialize the entry as file with minimized or original contents.
-  ///
-  /// The filesystem opens the file even for `stat` calls open to avoid the
-  /// issues with stat + open of minimized files that might lead to a
-  /// mismatching size of the file.
-  llvm::ErrorOr<llvm::vfs::Status> initFile(StringRef Filename,
-                                            llvm::vfs::FileSystem &FS);
-
-  /// Minimize contents of the file.
-  void minimizeFile();
-
-  /// \returns True if the entry is initialized.
-  bool isInitialized() const {
-    return !MaybeStat || MaybeStat->isStatusKnown();
-  }
+  /// Creates an entry representing a file with contents.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
+                        CachedFileContents *Contents)
+      : MaybeStat(std::move(Stat)), ContentsAccess(Contents) {}
 
   /// \returns True if the entry is a filesystem error.
   bool isError() const { return !MaybeStat; }
 
-  /// \returns True if the current entry points to a directory.
+  /// \returns True if the current entry represents a directory.
   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
 
   /// \returns Original contents of the file.
   StringRef getOriginalContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
     assert(!MaybeStat->isDirectory() && "not a file");
-    assert(OriginalContents && "not read");
-    return OriginalContents->getBuffer();
+    CachedFileContents *Contents = getContents();
+    assert(Contents && "contents member not initialized");
+    return Contents->Original->getBuffer();
   }
 
   /// \returns Minimized contents of the file.
   StringRef getMinimizedContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
-    assert(!isDirectory() && "not a file");
-    llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load();
+    assert(!MaybeStat->isDirectory() && "not a file");
+    CachedFileContents *Contents = getContents();
+    assert(Contents && "contents member not initialized");
+    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
     assert(Buffer && "not minimized");
     return Buffer->getBuffer();
   }
 
-  /// \returns True if this entry represents a file that can be read.
-  bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); }
-
-  /// \returns True if this cached entry needs to be updated.
-  bool needsUpdate(bool ShouldBeMinimized) const {
-    return isReadable() && needsMinimization(ShouldBeMinimized);
-  }
-
-  /// \returns True if the contents of this entry need to be minimized.
-  bool needsMinimization(bool ShouldBeMinimized) const {
-    return ShouldBeMinimized && !MinimizedContentsAccess.load();
-  }
-
   /// \returns The error.
-  std::error_code getError() const {
-    assert(isInitialized() && "not initialized");
-    return MaybeStat.getError();
-  }
+  std::error_code getError() const { return MaybeStat.getError(); }
 
   /// \returns The entry status.
   llvm::vfs::Status getStatus() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
     return *MaybeStat;
   }
 
   /// \returns the name of the file.
   StringRef getName() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
     return MaybeStat->getName();
   }
 
-  /// Return the mapping between location -> distance that is used to speed up
+  /// \returns The unique ID of the entry.
+  llvm::sys::fs::UniqueID getUniqueID() const {
+    assert(!isError() && "error");
+    return MaybeStat->getUniqueID();
+  }
+
+  /// \returns The mapping between location -> distance that is used to speed up
   /// the block skipping in the preprocessor.
   const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
     assert(!isError() && "error");
     assert(!isDirectory() && "not a file");
-    return PPSkippedRangeMapping;
+    CachedFileContents *Contents = getContents();
+    assert(Contents && "not minimized");
+    return Contents->PPSkippedRangeMapping;
+  }
+
+  /// \returns The data structure holding both original and minimized contents.
+  CachedFileContents *getContents() const {
+    assert(!isError() && "error");
+    assert(!isDirectory() && "not a file");
+    return ContentsAccess.load();
   }
 
 private:
   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
-  std::unique_ptr<llvm::MemoryBuffer> OriginalContents;
-
-  /// Owning storage for the minimized file contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedContentsStorage;
-  /// Atomic view of the minimized file contents.
-  /// This prevents data races when multiple threads call `needsMinimization`.
-  std::atomic<llvm::MemoryBuffer *> MinimizedContentsAccess;
 
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// Non-owning pointer to the file contents. This can be shared between
+  /// multiple entries (e.g. between a symlink and its target).
+  std::atomic<CachedFileContents *> ContentsAccess;
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
@@ -144,24 +142,69 @@
 /// the worker threads.
 class DependencyScanningFilesystemSharedCache {
 public:
-  struct SharedFileSystemEntry {
-    std::mutex ValueLock;
-    CachedFileSystemEntry Value;
+  struct CacheShard {
+    /// The mutex that needs to be locked before mutation of any member.
+    mutable std::mutex CacheLock;
+
+    /// Map from filenames to cached entries.
+    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
+        EntriesByFilename;
+
+    /// Map from unique IDs to cached entries.
+    llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
+        EntriesByUID;
+
+    /// The backing storage for cached entries.
+    llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
+
+    /// Map from unique IDs to cached contents.
+    llvm::DenseMap<llvm::sys::fs::UniqueID, CachedFileContents *> ContentsCache;
+
+    /// The backing storage for cached contents.
+    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
+
+    /// Returns entry associated with the filename or nullptr if none is found.
+    const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
+
+    /// Returns entry associated with the unique ID or nullptr if none is found.
+    const CachedFileSystemEntry *
+    findEntryByUID(llvm::sys::fs::UniqueID UID) const;
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// constructs new one with the given status, associates it with the
+    /// filename and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat);
+
+    /// Returns entry associated with the unique ID if there is some. Otherwise,
+    /// constructs new one with the given status and contents, associates it
+    /// with the unique ID and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+                            CachedFileContents *Contents = nullptr);
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// associates the given entry with the filename and returns it.
+    const CachedFileSystemEntry &
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry);
+
+    /// Returns contents associated with the unique ID if there is some.
+    /// Otherwise, constructs new one with the given buffer, associates it with
+    /// the unique ID and returns the result.
+    CachedFileContents &getOrEmplaceContentsForUID(
+        llvm::sys::fs::UniqueID UID,
+        std::unique_ptr<llvm::MemoryBuffer> OriginalContents);
   };
 
   DependencyScanningFilesystemSharedCache();
 
-  /// Returns a cache entry for the corresponding key.
-  ///
-  /// A new cache entry is created if the key is not in the cache. This is a
-  /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  /// Returns shard for the given key.
+  CacheShard &getShardForFilename(StringRef Filename) const;
+  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
 private:
-  struct CacheShard {
-    std::mutex CacheLock;
-    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
-  };
   std::unique_ptr<CacheShard[]> CacheShards;
   unsigned NumShards;
 };
@@ -173,8 +216,20 @@
   llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
 public:
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
-    return Cache[Filename];
+  /// Returns entry associated with the filename or nullptr if none is found.
+  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
+
+  /// Associates the given entry with the filename and returns the given entry
+  /// pointer (for convenience).
+  const CachedFileSystemEntry &
+  insertEntryForFilename(StringRef Filename,
+                         const CachedFileSystemEntry &Entry) {
+    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
+    assert(InsertedEntry == &Entry && "entry already present");
+    return *InsertedEntry;
   }
 };
 
@@ -187,15 +242,18 @@
   /// are minimized.
   bool Minimized;
 
+  /// The filename used to access this entry.
+  std::string Filename;
+
   /// The underlying cached entry.
   const CachedFileSystemEntry &Entry;
 
 public:
-  EntryRef(bool Minimized, const CachedFileSystemEntry &Entry)
-      : Minimized(Minimized), Entry(Entry) {}
+  EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
+      : Minimized(Minimized), Filename(Name), Entry(Entry) {}
 
   llvm::vfs::Status getStatus() const {
-    llvm::vfs::Status Stat = Entry.getStatus();
+    auto Stat = llvm::vfs::Status::copyWithNewName(Entry.getStatus(), Filename);
     if (Stat.isDirectory())
       return Stat;
     return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
@@ -203,7 +261,6 @@
 
   bool isError() const { return Entry.isError(); }
   bool isDirectory() const { return Entry.isDirectory(); }
-  StringRef getName() const { return Entry.getName(); }
 
   /// If the cached entry represents an error, promotes it into `ErrorOr`.
   llvm::ErrorOr<EntryRef> unwrapError() const {
@@ -253,8 +310,88 @@
   /// Check whether the file should be minimized.
   bool shouldMinimize(StringRef Filename);
 
+  /// Returns entry for the given filename.
+  ///
+  /// Attempts to use the local and shared caches first, then falls back to
+  /// using the underlying filesystem.
   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
 
+  /// For a filename that's not yet associated with any entry in the caches,
+  /// uses the underlying filesystem to either look up the entry based in the
+  /// shared cache indexed by unique ID, or creates new entry from scratch.
+  llvm::ErrorOr<const CachedFileSystemEntry &>
+  computeAndStoreResult(StringRef Filename);
+
+  /// Minimizes the given entry if necessary and returns a wrapper object with
+  /// reference semantics.
+  EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry,
+                               StringRef Filename);
+
+  /// Represents a filesystem entry that has been stat-ed (and potentially read)
+  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
+  struct TentativeEntry {
+    llvm::vfs::Status Status;
+    std::unique_ptr<llvm::MemoryBuffer> Contents;
+
+    TentativeEntry(const llvm::vfs::Status &Status,
+                   std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
+        : Status(llvm::vfs::Status::copyWithNewName(Status, "")),
+          Contents(std::move(Contents)) {}
+  };
+
+  /// Reads file at the given path. Enforces consistency between the file size
+  /// in status and size of read contents.
+  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
+
+  /// Returns entry associated with the unique ID of the given tentative entry
+  /// if there is some in the shared cache. Otherwise, constructs new one,
+  /// associates it with the unique ID and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
+
+  /// Returns entry associated with the filename or nullptr if none is found.
+  ///
+  /// Returns entry from local cache if there is some. Otherwise, if the entry
+  /// is found in the shared cache, writes it through the local cache and
+  /// returns it. Otherwise returns nullptr.
+  const CachedFileSystemEntry *
+  findEntryByFilenameWithWriteThrough(StringRef Filename);
+
+  /// Returns entry associated with the unique ID in the shared cache or nullptr
+  /// if none is found.
+  const CachedFileSystemEntry *
+  findSharedEntryByUID(llvm::vfs::Status Stat) const {
+    return SharedCache.getShardForUID(Stat.getUniqueID())
+        .findEntryByUID(Stat.getUniqueID());
+  }
+
+  /// Associates the given entry with the filename in the local cache and
+  /// returns it.
+  const CachedFileSystemEntry &
+  insertLocalEntryForFilename(StringRef Filename,
+                              const CachedFileSystemEntry &Entry) {
+    return LocalCache.insertEntryForFilename(Filename, Entry);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, constructs new one with the given error code, associates
+  /// it with the filename and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrEmplaceEntryForFilename(Filename, EC);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, associates the given entry with the filename and returns
+  /// it.
+  const CachedFileSystemEntry &
+  getOrInsertSharedEntryForFilename(StringRef Filename,
+                                    const CachedFileSystemEntry &Entry) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrInsertEntryForFilename(Filename, Entry);
+  }
+
   /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to