https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/124786
>From 7060564de1bb6062639f4b4839fa17958f212755 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <qiongsi...@apple.com> Date: Mon, 27 Jan 2025 16:44:30 -0800 Subject: [PATCH 1/2] Initial implementation of clang modules current working directory pruning. --- .../DependencyScanningService.h | 5 +- .../DependencyScanning/ModuleDepCollector.cpp | 92 ++++++++++++- .../ClangScanDeps/modules-context-hash-cwd.c | 123 ++++++++++++++++++ clang/test/ClangScanDeps/working-dir.m | 2 +- clang/tools/clang-scan-deps/ClangScanDeps.cpp | 2 + 5 files changed, 219 insertions(+), 5 deletions(-) create mode 100644 clang/test/ClangScanDeps/modules-context-hash-cwd.c diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index 4a343f2872d8d9..9ad8e68c33eb10 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -63,7 +63,10 @@ enum class ScanningOptimizations { /// Canonicalize -D and -U options. Macros = 8, - DSS_LAST_BITMASK_ENUM(Macros), + /// Ignore the compiler's working directory if it is safe. + IgnoreCWD = 0x10, + + DSS_LAST_BITMASK_ENUM(IgnoreCWD), Default = All }; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 2e97cac0796cee..714efb86fa3796 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -397,9 +397,92 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) { } } +static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) { + // Check if the command line input uses relative paths. + // It is not safe to ignore the current working directory if any of the + // command line inputs use relative paths. +#define IF_RELATIVE_RETURN_FALSE(PATH) \ + do { \ + if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \ + return false; \ + } while (0) + +#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \ + do { \ + if (std::any_of(PATHS.begin(), PATHS.end(), [](const auto &P) { \ + return !P.empty() && !llvm::sys::path::is_absolute(P); \ + })) \ + return false; \ + } while (0) + + // Header search paths. + const auto &HeaderSearchOpts = CI.getHeaderSearchOpts(); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot); + for (auto &Entry : HeaderSearchOpts.UserEntries) + if (Entry.IgnoreSysRoot) + IF_RELATIVE_RETURN_FALSE(Entry.Path); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath); + for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(), + E = HeaderSearchOpts.PrebuiltModuleFiles.end(); + I != E;) { + auto Current = I++; + IF_RELATIVE_RETURN_FALSE(Current->second); + } + IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths); + IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles); + + // Preprocessor options. + const auto &PPOpts = CI.getPreprocessorOpts(); + IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes); + IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes); + IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude); + + // Frontend options. + const auto &FrontendOpts = CI.getFrontendOpts(); + for (const FrontendInputFile &Input : FrontendOpts.Inputs) { + if (Input.isBuffer()) + continue; // FIXME: Can this happen when parsing command-line? + + IF_RELATIVE_RETURN_FALSE(Input.getFile()); + } + IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles); + IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile); + IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile); + + // Filesystem options. + const auto &FileSystemOpts = CI.getFileSystemOpts(); + IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir); + + // Codegen options. + const auto &CodeGenOpts = CI.getCodeGenOpts(); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir); + + // Sanitizer options. + IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles); + + // Coverage mappings. + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile); + + // Dependency output options. + for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps) + IF_RELATIVE_RETURN_FALSE(ExtraDep.first); + + return true; +} + static std::string getModuleContextHash(const ModuleDeps &MD, const CowCompilerInvocation &CI, bool EagerLoadModules, + bool IgnoreCWD, llvm::vfs::FileSystem &VFS) { llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native> HashBuilder; @@ -410,7 +493,7 @@ static std::string getModuleContextHash(const ModuleDeps &MD, HashBuilder.add(getClangFullRepositoryVersion()); HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR); llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory(); - if (CWD) + if (CWD && !IgnoreCWD) HashBuilder.add(*CWD); // Hash the BuildInvocation without any input files. @@ -443,8 +526,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD, void ModuleDepCollector::associateWithContextHash( const CowCompilerInvocation &CI, ModuleDeps &Deps) { - Deps.ID.ContextHash = getModuleContextHash( - Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem()); + bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) && + isSafeToIgnoreCWD(CI); + Deps.ID.ContextHash = + getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD, + ScanInstance.getVirtualFileSystem()); bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second; (void)Inserted; assert(Inserted && "duplicate module mapping"); diff --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c b/clang/test/ClangScanDeps/modules-context-hash-cwd.c new file mode 100644 index 00000000000000..45be72301c635d --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c @@ -0,0 +1,123 @@ +// Test current directory pruning when computing the context hash. + +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb2.json.in > %t/cdb2.json +// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json +// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json +// RUN: clang-scan-deps -compilation-database %t/cdb2.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json +// RUN: cat %t/result0.json %t/result1.json | FileCheck %s +// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT + +//--- cdb0.json.in +[{ + "directory": "DIR", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +//--- cdb1.json.in +[{ + "directory": "DIR/a", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +//--- cdb2.json.in +[{ + "directory": "DIR/a/", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +//--- include/module.modulemap +module mod { + header "mod.h" +} + +//--- include/mod.h + +//--- tu.c +#include "mod.h" + +// Check that result0 and result1 compute the same hash with optimization +// on. The only difference between result0 and result1 is the compiler's +// working directory. +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK: "context-hash": "[[HASH:.*]]", +// CHECK: } +// CHECK: "translation-units": [ +// CHECK: { +// CHECK: "commands": [ +// CHECK: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH]]", +// CHECK-NEXT: "module-name": "mod" +// CHECK: } +// CHECK: ], +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK: "context-hash": "[[HASH]]", +// CHECK: } +// CHECK: "translation-units": [ +// CHECK: { +// CHECK: "commands": [ +// CHECK: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH]]", +// CHECK-NEXT: "module-name": "mod" +// CHECK: } +// CHECK: ], + +// Check that result0 and result2 compute different hashes because +// the working directory optmization is turned off for result2. +// SKIPOPT: { +// SKIPOPT-NEXT: "modules": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "clang-module-deps": [], +// SKIPOPT: "context-hash": "[[HASH0:.*]]", +// SKIPOPT: } +// SKIPOPT: "translation-units": [ +// SKIPOPT: { +// SKIPOPT: "commands": [ +// SKIPOPT: { +// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}", +// SKIPOPT-NEXT: "clang-module-deps": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "context-hash": "[[HASH0]]", +// SKIPOPT-NEXT: "module-name": "mod" +// SKIPOPT: } +// SKIPOPT: ], +// SKIPOPT: { +// SKIPOPT-NEXT: "modules": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "clang-module-deps": [], +// SKIPOPT-NOT: "context-hash": "[[HASH0]]", +// SKIPOPT: "context-hash": "[[HASH2:.*]]", +// SKIPOPT: } +// SKIPOPT: "translation-units": [ +// SKIPOPT: { +// SKIPOPT: "commands": [ +// SKIPOPT: { +// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}", +// SKIPOPT-NEXT: "clang-module-deps": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NOT: "context-hash": "[[HASH0]]", +// SKIPOPT-NEXT: "context-hash": "[[HASH2]]" +// SKIPOPT-NEXT: "module-name": "mod" +// SKIPOPT: } +// SKIPOPT: ], + diff --git a/clang/test/ClangScanDeps/working-dir.m b/clang/test/ClangScanDeps/working-dir.m index a04f8c2486b98d..c6b7b1988d3cf7 100644 --- a/clang/test/ClangScanDeps/working-dir.m +++ b/clang/test/ClangScanDeps/working-dir.m @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \ -// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db +// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t // Check that there are two separate modules hashes. One for each working dir. diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 709dc513be2811..8d429534a20073 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -164,6 +164,8 @@ static void ParseArgs(int argc, char **argv) { .Case("system-warnings", ScanningOptimizations::SystemWarnings) .Case("vfs", ScanningOptimizations::VFS) .Case("canonicalize-macros", ScanningOptimizations::Macros) + .Case("ignore-current-working-dir", + ScanningOptimizations::IgnoreCWD) .Case("all", ScanningOptimizations::All) .Default(std::nullopt); if (!Optimization) { >From 1306637c4d7d64386b653dae71b3be1ffc1952d7 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu <qiongsi...@apple.com> Date: Tue, 28 Jan 2025 14:44:04 -0800 Subject: [PATCH 2/2] Fix formatting --- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 714efb86fa3796..f84e849bce8d1b 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -481,8 +481,7 @@ static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) { static std::string getModuleContextHash(const ModuleDeps &MD, const CowCompilerInvocation &CI, - bool EagerLoadModules, - bool IgnoreCWD, + bool EagerLoadModules, bool IgnoreCWD, llvm::vfs::FileSystem &VFS) { llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native> HashBuilder; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits