dexonsmith created this revision.
dexonsmith added reviewers: dblaikie, JDevlieghere.
Herald added subscribers: ributzka, hiraditya.
dexonsmith requested review of this revision.
Herald added projects: clang, LLVM.

Refactor the duplicated canonicalize-path logic in `FileCollector` and
`ModulesDependencyCollector` into a new utility called
`PathCanonicalizer` that's shared. This popped up when tracking down a
bug common to both in https://reviews.llvm.org/D95202.

As drive-bys, update a few names and comments to better reflect the
effect of the code, delay removal of `..`s to avoid an unnecessary extra
string copy, and leave behind a couple of FIXMEs for future
consideration.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95279

Files:
  clang/include/clang/Frontend/Utils.h
  clang/lib/Frontend/ModuleDependencyCollector.cpp
  llvm/include/llvm/Support/FileCollector.h
  llvm/lib/Support/FileCollector.cpp

Index: llvm/lib/Support/FileCollector.cpp
===================================================================
--- llvm/lib/Support/FileCollector.cpp
+++ llvm/lib/Support/FileCollector.cpp
@@ -53,62 +53,81 @@
     : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
 }
 
-bool FileCollector::getRealPath(StringRef SrcPath,
-                                SmallVectorImpl<char> &Result) {
-  SmallString<256> RealPath;
+void FileCollector::PathCanonicalizer::updateWithRealPath(
+    SmallVectorImpl<char> &Path) {
+  StringRef SrcPath(Path.begin(), Path.size());
   StringRef FileName = sys::path::filename(SrcPath);
-  std::string Directory = sys::path::parent_path(SrcPath).str();
-  auto DirWithSymlink = SymlinkMap.find(Directory);
+  StringRef Directory = sys::path::parent_path(SrcPath);
 
   // Use real_path to fix any symbolic link component present in a path.
   // Computing the real path is expensive, cache the search through the parent
   // path Directory.
-  if (DirWithSymlink == SymlinkMap.end()) {
-    auto EC = sys::fs::real_path(Directory, RealPath);
-    if (EC)
-      return false;
-    SymlinkMap[Directory] = std::string(RealPath.str());
+  SmallString<256> RealDirectory;
+  auto DirWithSymlink = CachedDirs.find(Directory);
+  if (DirWithSymlink == CachedDirs.end()) {
+    // FIXME: Should this be a call to FileSystem::getRealpath(), in some
+    // cases? What if there is nothing on disk?
+    if (sys::fs::real_path(Directory, RealDirectory))
+      return;
+    CachedDirs[Directory] = std::string(RealDirectory.str());
   } else {
-    RealPath = DirWithSymlink->second;
+    RealDirectory = DirWithSymlink->second;
   }
 
-  sys::path::append(RealPath, FileName);
-  Result.swap(RealPath);
-  return true;
+  // Use the symlink-resolved directory.
+  Path.swap(RealDirectory);
+
+  // Symlinks in the filename don't matter.
+  //
+  // FIXME: If we can cope with this, maybe we can cope without calling
+  // getRealPath() at all when there's no ".." component.
+  sys::path::append(Path, FileName);
 }
 
-void FileCollector::addFileImpl(StringRef SrcPath) {
+/// Make Path absolute.
+static void makeAbsolute(SmallVectorImpl<char> &Path) {
   // We need an absolute src path to append to the root.
-  SmallString<256> AbsoluteSrc = SrcPath;
-  sys::fs::make_absolute(AbsoluteSrc);
+  sys::fs::make_absolute(Path);
 
   // Canonicalize src to a native path to avoid mixed separator styles.
-  sys::path::native(AbsoluteSrc);
+  sys::path::native(Path);
 
   // Remove redundant leading "./" pieces and consecutive separators.
-  StringRef TrimmedAbsoluteSrc =
-      sys::path::remove_leading_dotslash(AbsoluteSrc);
+  Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
+                               StringRef(Path.begin(), Path.size()))
+                               .begin());
+}
 
-  // Canonicalize the source path by removing "..", "." components.
-  SmallString<256> VirtualPath = TrimmedAbsoluteSrc;
-  sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
+FileCollector::PathCanonicalizer::PathStorage
+FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
+  PathStorage Paths;
+  Paths.VirtualPath = SrcPath;
+  makeAbsolute(Paths.VirtualPath);
 
   // If a ".." component is present after a symlink component, remove_dots may
   // lead to the wrong real destination path. Let the source be canonicalized
   // like that but make sure we always use the real path for the destination.
-  SmallString<256> CopyFrom;
-  if (!getRealPath(TrimmedAbsoluteSrc, CopyFrom))
-    CopyFrom = VirtualPath;
+  Paths.CopyFrom = Paths.VirtualPath;
+  updateWithRealPath(Paths.CopyFrom);
+
+  // Canonicalize the virtual path by removing "..", "." components.
+  sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
+
+  return Paths;
+}
+
+void FileCollector::addFileImpl(StringRef SrcPath) {
+  PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
 
   SmallString<256> DstPath = StringRef(Root);
-  sys::path::append(DstPath, sys::path::relative_path(CopyFrom));
+  sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
 
   // Always map a canonical src path to its real path into the YAML, by doing
   // this we map different virtual src paths to the same entry in the VFS
   // overlay, which is a way to emulate symlink inside the VFS; this is also
   // needed for correctness, not doing that can lead to module redefinition
   // errors.
-  addFileToMapping(VirtualPath, DstPath);
+  addFileToMapping(Paths.VirtualPath, DstPath);
 }
 
 llvm::vfs::directory_iterator
Index: llvm/include/llvm/Support/FileCollector.h
===================================================================
--- llvm/include/llvm/Support/FileCollector.h
+++ llvm/include/llvm/Support/FileCollector.h
@@ -69,6 +69,27 @@
 /// as relative paths inside of the Root.
 class FileCollector : public FileCollectorBase {
 public:
+  /// Helper utility that encapsulates the logic for canonicalizing a virtual
+  /// path and a path to copy from.
+  class PathCanonicalizer {
+  public:
+    struct PathStorage {
+      SmallString<256> CopyFrom;
+      SmallString<256> VirtualPath;
+    };
+
+    /// Canonicalize a pair of virtual and real paths.
+    PathStorage canonicalize(StringRef SrcPath);
+
+  private:
+    /// Replace with a (mostly) real path, or don't modify. Resolves symlinks
+    /// in the directory, using \a CachedDirs to avoid redundant lookups, but
+    /// leaves the filename as a possible symlink.
+    void updateWithRealPath(SmallVectorImpl<char> &Path);
+
+    StringMap<std::string> CachedDirs;
+  };
+
   /// \p Root is the directory where collected files are will be stored.
   /// \p OverlayRoot is VFS mapping root.
   /// \p Root directory gets created in copyFiles unless it already exists.
@@ -93,8 +114,6 @@
 private:
   friend FileCollectorFileSystem;
 
-  bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
-
   void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
     if (sys::fs::is_directory(VirtualPath))
       VFSWriter.addDirectoryMapping(VirtualPath, RealPath);
@@ -119,8 +138,8 @@
   /// The yaml mapping writer.
   vfs::YAMLVFSWriter VFSWriter;
 
-  /// Caches RealPath calls when resolving symlinks.
-  StringMap<std::string> SymlinkMap;
+  /// Helper utility for canonicalizing paths.
+  PathCanonicalizer Canonicalizer;
 };
 
 } // end namespace llvm
Index: clang/lib/Frontend/ModuleDependencyCollector.cpp
===================================================================
--- clang/lib/Frontend/ModuleDependencyCollector.cpp
+++ clang/lib/Frontend/ModuleDependencyCollector.cpp
@@ -156,72 +156,32 @@
   VFSWriter.write(OS);
 }
 
-bool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
-                                            SmallVectorImpl<char> &Result) {
-  using namespace llvm::sys;
-  SmallString<256> RealPath;
-  StringRef FileName = path::filename(SrcPath);
-  std::string Dir = path::parent_path(SrcPath).str();
-  auto DirWithSymLink = SymLinkMap.find(Dir);
-
-  // Use real_path to fix any symbolic link component present in a path.
-  // Computing the real path is expensive, cache the search through the
-  // parent path directory.
-  if (DirWithSymLink == SymLinkMap.end()) {
-    if (llvm::sys::fs::real_path(Dir, RealPath))
-      return false;
-    SymLinkMap[Dir] = std::string(RealPath.str());
-  } else {
-    RealPath = DirWithSymLink->second;
-  }
-
-  path::append(RealPath, FileName);
-  Result.swap(RealPath);
-  return true;
-}
-
 std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
                                                       StringRef Dst) {
   using namespace llvm::sys;
+  llvm::FileCollector::PathCanonicalizer::PathStorage Paths =
+      Canonicalizer.canonicalize(SrcPath);
 
-  // We need an absolute src path to append to the root.
-  SmallString<256> AbsoluteSrc = Src;
-  fs::make_absolute(AbsoluteSrc);
-  // Canonicalize src to a native path to avoid mixed separator styles.
-  path::native(AbsoluteSrc);
-  // Remove redundant leading "./" pieces and consecutive separators.
-  StringRef TrimmedAbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
-
-  // Canonicalize the source path by removing "..", "." components.
-  SmallString<256> VirtualPath = TrimmedAbsoluteSrc;
-  path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
-
-  // If a ".." component is present after a symlink component, remove_dots may
-  // lead to the wrong real destination path. Let the source be canonicalized
-  // like that but make sure we always use the real path for the destination.
-  SmallString<256> CopyFrom;
-  if (!getRealPath(TrimmedAbsoluteSrc, CopyFrom))
-    CopyFrom = VirtualPath;
   SmallString<256> CacheDst = getDest();
 
   if (Dst.empty()) {
     // The common case is to map the virtual path to the same path inside the
     // cache.
-    path::append(CacheDst, path::relative_path(CopyFrom));
+    path::append(CacheDst, path::relative_path(Paths.CopyFrom));
   } else {
     // When collecting entries from input vfsoverlays, copy the external
     // contents into the cache but still map from the source.
     if (!fs::exists(Dst))
       return std::error_code();
     path::append(CacheDst, Dst);
-    CopyFrom = Dst;
+    Paths.CopyFrom = Dst;
   }
 
   // Copy the file into place.
   if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst),
                                                   /*IgnoreExisting=*/true))
     return EC;
-  if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst))
+  if (std::error_code EC = fs::copy_file(Paths.CopyFrom, CacheDst))
     return EC;
 
   // Always map a canonical src path to its real path into the YAML, by doing
@@ -229,7 +189,7 @@
   // overlay, which is a way to emulate symlink inside the VFS; this is also
   // needed for correctness, not doing that can lead to module redefinition
   // errors.
-  addFileMapping(VirtualPath, CacheDst);
+  addFileMapping(Paths.VirtualPath, CacheDst);
   return std::error_code();
 }
 
Index: clang/include/clang/Frontend/Utils.h
===================================================================
--- clang/include/clang/Frontend/Utils.h
+++ clang/include/clang/Frontend/Utils.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Option/OptSpecifier.h"
+#include "llvm/Support/FileCollector.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include <cstdint>
 #include <memory>
@@ -151,9 +152,8 @@
   bool HasErrors = false;
   llvm::StringSet<> Seen;
   llvm::vfs::YAMLVFSWriter VFSWriter;
-  llvm::StringMap<std::string> SymLinkMap;
+  llvm::FileCollector::PathCanonicalizer Canonicalizer;
 
-  bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
   std::error_code copyToRoot(StringRef Src, StringRef Dst = {});
 
 public:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D95279: Su... Duncan P. N. Exon Smith via Phabricator via cfe-commits

Reply via email to