benlangmuir created this revision. benlangmuir added reviewers: jansvoboda11, Bigcheese. Herald added a project: All. benlangmuir requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
When dep-scanning, canonicalize the module map path as much as we can. This avoids unnecessarily needing to build multiple versions of a module due to symlinks or case-insensitive file paths. Despite the name `tryGetRealPathName`, the previous implementation did not actually return the realpath most of the time, and indeed it would be incorrect to do so since the realpath could be outside the module directory, which would have broken finding headers relative to the module. Instead, use a canonicalization that is specific to the needs of modulemap files (canonicalize the directory separately from the filename). Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D134923 Files: clang/include/clang/Lex/ModuleMap.h clang/lib/Lex/HeaderSearch.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp clang/test/ClangScanDeps/modules-symlink-dir.c
Index: clang/test/ClangScanDeps/modules-symlink-dir.c =================================================================== --- /dev/null +++ clang/test/ClangScanDeps/modules-symlink-dir.c @@ -0,0 +1,119 @@ +// Check that we canonicalize the module map path without changing the module +// directory, which would break header lookup. + +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json +// RUN: ln -s module %t/symlink-to-module +// RUN: ln -s ../actual.modulemap %t/module/module.modulemap +// RUN: ln -s A %t/module/F.framework/Versions/Current +// RUN: ln -s Versions/Current/Modules %t/module/F.framework/Modules +// RUN: ln -s Versions/Current/Headers %t/module/F.framework/Headers + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -reuse-filemanager=0 \ +// RUN: -format experimental-full -mode=preprocess-dependency-directives \ +// RUN: -optimize-args -module-files-dir %t/build > %t/deps.json + +// RUN: cat %t/deps.json | FileCheck -DPREFIX=%/t %s + +// Check the module commands actually build. +// RUN: %deps-to-rsp %t/deps.json --module-name=Mod > %t/Mod.rsp +// RUN: %deps-to-rsp %t/deps.json --module-name=F > %t/F.rsp +// RUN: %clang @%t/Mod.rsp +// RUN: %clang @%t/F.rsp + +// CHECK: "modules": [ +// CHECK: { +// CHECK: "clang-module-deps": [], +// CHECK: "clang-modulemap-file": "[[PREFIX]]{{.}}module{{.}}F.framework{{.}}Modules{{.}}module.modulemap" +// CHECK: "command-line": [ +// CHECK-NOT: symlink-to-module +// CHECK: "[[PREFIX]]{{.}}module{{.}}F.framework{{.}}Modules{{.}}module.modulemap" +// CHECK-NOT: symlink-to-module +// CHECK: ] +// CHECK: "context-hash": "[[F_CONTEXT_HASH:[A-Z0-9]+]]" +// CHECK: "name": "F" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "clang-modulemap-file": "[[PREFIX]]{{.}}module{{.}}module.modulemap" +// CHECK: "command-line": [ +// CHECK-NOT: symlink-to-module +// CHECK: "[[PREFIX]]{{.}}module{{.}}module.modulemap" +// CHECK-NOT: symlink-to-module +// CHECK: ] +// CHECK: "context-hash": "[[CONTEXT_HASH:[A-Z0-9]+]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK: "translation-units": [ +// CHECK: "clang-module-deps": [ +// CHECK: { +// CHECK: "context-hash": "[[CONTEXT_HASH]]" +// CHECK: "module-name": "Mod" +// CHECK: } +// CHECK-NEXT: ], +// CHECK: "clang-module-deps": [ +// CHECK: { +// CHECK: "context-hash": "[[CONTEXT_HASH]]" +// CHECK: "module-name": "Mod" +// CHECK: } +// CHECK-NEXT: ] +// CHECK: "clang-module-deps": [ +// CHECK: { +// CHECK: "context-hash": "[[F_CONTEXT_HASH]]" +// CHECK: "module-name": "F" +// CHECK: } +// CHECK-NEXT: ] +// CHECK: "clang-module-deps": [ +// CHECK: { +// CHECK: "context-hash": "[[F_CONTEXT_HASH]]" +// CHECK: "module-name": "F" +// CHECK: } +// CHECK-NEXT: ] + +//--- cdb.json.in +[ + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu1.c" + }, + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu2.c" + }, + { + "directory": "DIR", + "command": "clang -fsyntax-only -F DIR/symlink-to-module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu3.c" + }, + { + "directory": "DIR", + "command": "clang -fsyntax-only -F DIR/module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu3.c" + }, +] + +//--- actual.modulemap +module Mod { header "header.h" } + +//--- module/header.h + +//--- tu1.c +#include "symlink-to-module/header.h" + +//--- tu2.c +#include "module/header.h" + +//--- module/F.framework/Versions/A/Modules/module.modulemap +framework module F { + umbrella header "F.h" +} + +//--- module/F.framework/Versions/A/Headers/F.h + +//--- tu3.c +#include "F/F.h" Index: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp =================================================================== --- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -406,15 +406,16 @@ MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); MD.IsSystem = M->IsSystem; - const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() - .getHeaderSearchInfo() - .getModuleMap() - .getModuleMapFileForUniquing(M); + ModuleMap &ModMapInfo = + MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap(); + + const FileEntry *ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M); if (ModuleMap) { - StringRef Path = ModuleMap->tryGetRealPathName(); - if (Path.empty()) - Path = ModuleMap->getName(); + // FIXME: Update header search to keep FileEntryRef rather than rely on + // getLastRef(). + SmallString<128> Path = ModuleMap->getLastRef().getNameAsRequested(); + ModMapInfo.canonicalizeModuleMapPath(Path); MD.ClangModuleMapFile = std::string(Path); } Index: clang/lib/Lex/ModuleMap.cpp =================================================================== --- clang/lib/Lex/ModuleMap.cpp +++ clang/lib/Lex/ModuleMap.cpp @@ -1299,6 +1299,38 @@ InferredModuleAllowedBy[M] = ModMap; } +std::error_code +ModuleMap::canonicalizeModuleMapPath(SmallVectorImpl<char> &Path) { + StringRef Dir = llvm::sys::path::parent_path({Path.data(), Path.size()}); + + // Do not canonicalize within the framework; the module map parser expects + // Modules/ not Versions/A/Modules. + if (llvm::sys::path::filename(Dir) == "Modules") { + StringRef Parent = llvm::sys::path::parent_path(Dir); + if (Parent.endswith(".framework")) + Dir = Parent; + } + + FileManager &FM = SourceMgr.getFileManager(); + auto DirEntry = FM.getDirectory(Dir.empty() ? "." : Dir); + if (!DirEntry) + return DirEntry.getError(); + + // Canonicalize the directory. + StringRef CanonicalDir = FM.getCanonicalName(*DirEntry); + if (CanonicalDir != Dir) { + bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir); + (void)Done; + assert(Done && "Path should always start with Dir"); + } + + // In theory, the filename component should also be canonicalized if it + // on a case-insensitive filesystem. However, the extra canonicalization is + // expensive and if clang looked up the filename it will always be lowercase. + + return std::error_code(); +} + void ModuleMap::addAdditionalModuleMapFile(const Module *M, const FileEntry *ModuleMap) { AdditionalModMaps[M].insert(ModuleMap); Index: clang/lib/Lex/HeaderSearch.cpp =================================================================== --- clang/lib/Lex/HeaderSearch.cpp +++ clang/lib/Lex/HeaderSearch.cpp @@ -255,18 +255,11 @@ // // To avoid false-negatives, we form as canonical a path as we can, and map // to lower-case in case we're on a case-insensitive file system. - std::string Parent = - std::string(llvm::sys::path::parent_path(ModuleMapPath)); - if (Parent.empty()) - Parent = "."; - auto Dir = FileMgr.getDirectory(Parent); - if (!Dir) + SmallString<128> CanonicalPath(ModuleMapPath); + if (getModuleMap().canonicalizeModuleMapPath(CanonicalPath)) return {}; - auto DirName = FileMgr.getCanonicalName(*Dir); - auto FileName = llvm::sys::path::filename(ModuleMapPath); - llvm::hash_code Hash = - llvm::hash_combine(DirName.lower(), FileName.lower()); + llvm::hash_code Hash = llvm::hash_combine(CanonicalPath.str().lower()); SmallString<128> HashStr; llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); Index: clang/include/clang/Lex/ModuleMap.h =================================================================== --- clang/include/clang/Lex/ModuleMap.h +++ clang/include/clang/Lex/ModuleMap.h @@ -622,6 +622,15 @@ void setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap); + /// Canonicalize \p Path in a manner suitable for a module map file. In + /// particular, this canonicalizes the parent directory separately from the + /// filename so that it does not affect header resolution relative to the + /// modulemap. + /// + /// \returns an error code if any filesystem operations failed. In this case + /// \p Path is not modified. + std::error_code canonicalizeModuleMapPath(SmallVectorImpl<char> &Path); + /// Get any module map files other than getModuleMapFileForUniquing(M) /// that define submodules of a top-level module \p M. This is cheaper than /// getting the module map file for each submodule individually, since the
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits