https://github.com/qiongsiwu updated 
https://github.com/llvm/llvm-project/pull/124786

>From 7060564de1bb6062639f4b4839fa17958f212755 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi...@apple.com>
Date: Mon, 27 Jan 2025 16:44:30 -0800
Subject: [PATCH 1/2] Initial implementation of clang modules current working
 directory pruning.

---
 .../DependencyScanningService.h               |   5 +-
 .../DependencyScanning/ModuleDepCollector.cpp |  92 ++++++++++++-
 .../ClangScanDeps/modules-context-hash-cwd.c  | 123 ++++++++++++++++++
 clang/test/ClangScanDeps/working-dir.m        |   2 +-
 clang/tools/clang-scan-deps/ClangScanDeps.cpp |   2 +
 5 files changed, 219 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/modules-context-hash-cwd.c

diff --git 
a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h 
b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 4a343f2872d8d9..9ad8e68c33eb10 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -63,7 +63,10 @@ enum class ScanningOptimizations {
   /// Canonicalize -D and -U options.
   Macros = 8,
 
-  DSS_LAST_BITMASK_ENUM(Macros),
+  /// Ignore the compiler's working directory if it is safe.
+  IgnoreCWD = 0x10,
+
+  DSS_LAST_BITMASK_ENUM(IgnoreCWD),
   Default = All
 };
 
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp 
b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 2e97cac0796cee..714efb86fa3796 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -397,9 +397,92 @@ void 
ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
   }
 }
 
+static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
+  // Check if the command line input uses relative paths.
+  // It is not safe to ignore the current working directory if any of the
+  // command line inputs use relative paths.
+#define IF_RELATIVE_RETURN_FALSE(PATH)                                         
\
+  do {                                                                         
\
+    if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH))                  
\
+      return false;                                                            
\
+  } while (0)
+
+#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS)                                    
\
+  do {                                                                         
\
+    if (std::any_of(PATHS.begin(), PATHS.end(), [](const auto &P) {            
\
+          return !P.empty() && !llvm::sys::path::is_absolute(P);               
\
+        }))                                                                    
\
+      return false;                                                            
\
+  } while (0)
+
+  // Header search paths.
+  const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
+  for (auto &Entry : HeaderSearchOpts.UserEntries)
+    if (Entry.IgnoreSysRoot)
+      IF_RELATIVE_RETURN_FALSE(Entry.Path);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
+  for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
+            E = HeaderSearchOpts.PrebuiltModuleFiles.end();
+       I != E;) {
+    auto Current = I++;
+    IF_RELATIVE_RETURN_FALSE(Current->second);
+  }
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);
+
+  // Preprocessor options.
+  const auto &PPOpts = CI.getPreprocessorOpts();
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
+  IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);
+
+  // Frontend options.
+  const auto &FrontendOpts = CI.getFrontendOpts();
+  for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
+    if (Input.isBuffer())
+      continue; // FIXME: Can this happen when parsing command-line?
+
+    IF_RELATIVE_RETURN_FALSE(Input.getFile());
+  }
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);
+
+  // Filesystem options.
+  const auto &FileSystemOpts = CI.getFileSystemOpts();
+  IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);
+
+  // Codegen options.
+  const auto &CodeGenOpts = CI.getCodeGenOpts();
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);
+
+  // Sanitizer options.
+  IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);
+
+  // Coverage mappings.
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);
+
+  // Dependency output options.
+  for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
+    IF_RELATIVE_RETURN_FALSE(ExtraDep.first);
+
+  return true;
+}
+
 static std::string getModuleContextHash(const ModuleDeps &MD,
                                         const CowCompilerInvocation &CI,
                                         bool EagerLoadModules,
+                                        bool IgnoreCWD,
                                         llvm::vfs::FileSystem &VFS) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
       HashBuilder;
@@ -410,7 +493,7 @@ static std::string getModuleContextHash(const ModuleDeps 
&MD,
   HashBuilder.add(getClangFullRepositoryVersion());
   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
-  if (CWD)
+  if (CWD && !IgnoreCWD)
     HashBuilder.add(*CWD);
 
   // Hash the BuildInvocation without any input files.
@@ -443,8 +526,11 @@ static std::string getModuleContextHash(const ModuleDeps 
&MD,
 
 void ModuleDepCollector::associateWithContextHash(
     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
-  Deps.ID.ContextHash = getModuleContextHash(
-      Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
+  bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
+                   isSafeToIgnoreCWD(CI);
+  Deps.ID.ContextHash =
+      getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
+                           ScanInstance.getVirtualFileSystem());
   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
   (void)Inserted;
   assert(Inserted && "duplicate module mapping");
diff --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c 
b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
new file mode 100644
index 00000000000000..45be72301c635d
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
@@ -0,0 +1,123 @@
+// Test current directory pruning when computing the context hash.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb2.json.in > %t/cdb2.json
+// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format 
experimental-full > %t/result0.json
+// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format 
experimental-full > %t/result1.json
+// RUN: clang-scan-deps -compilation-database %t/cdb2.json -format 
experimental-full 
-optimize-args=header-search,system-warnings,vfs,canonicalize-macros > 
%t/result2.json
+// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
+// RUN: cat %t/result0.json %t/result2.json | FileCheck %s 
-check-prefix=SKIPOPT
+
+//--- cdb0.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache 
-IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb1.json.in
+[{
+  "directory": "DIR/a",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache 
-IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb2.json.in
+[{
+  "directory": "DIR/a/",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache 
-IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- include/module.modulemap
+module mod {
+  header "mod.h"
+}
+
+//--- include/mod.h
+
+//--- tu.c
+#include "mod.h"
+
+// Check that result0 and result1 compute the same hash with optimization
+// on. The only difference between result0 and result1 is the compiler's
+// working directory.
+// CHECK:     {
+// CHECK-NEXT:  "modules": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:     "clang-module-deps": [],
+// CHECK:          "context-hash": "[[HASH:.*]]",
+// CHECK:        }
+// CHECK:       "translation-units": [
+// CHECK:        {
+// CHECK:          "commands": [
+// CHECK:          {
+// CHECK-NEXT:        "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:        "clang-module-deps": [
+// CHECK-NEXT:          {
+// CHECK-NEXT:            "context-hash": "[[HASH]]",
+// CHECK-NEXT:            "module-name": "mod"
+// CHECK:               }
+// CHECK:             ],
+// CHECK:     {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:    {
+// CHECK-NEXT:      "clang-module-deps": [],
+// CHECK:           "context-hash": "[[HASH]]",
+// CHECK:         }
+// CHECK:        "translation-units": [
+// CHECK:         {
+// CHECK:           "commands": [
+// CHECK:           {
+// CHECK-NEXT:         "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:         "clang-module-deps": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:             "context-hash": "[[HASH]]",
+// CHECK-NEXT:             "module-name": "mod"
+// CHECK:               }
+// CHECK:              ],
+
+// Check that result0 and result2 compute different hashes because
+// the working directory optmization is turned off for result2.
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:    {
+// SKIPOPT-NEXT:      "clang-module-deps": [],
+// SKIPOPT:           "context-hash": "[[HASH0:.*]]",
+// SKIPOPT:         }
+// SKIPOPT:        "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:            "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NEXT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:     {
+// SKIPOPT-NEXT:       "clang-module-deps": [],
+// SKIPOPT-NOT:        "context-hash": "[[HASH0]]",
+// SKIPOPT:            "context-hash": "[[HASH2:.*]]",
+// SKIPOPT:          }
+// SKIPOPT:       "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:           "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NOT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:             "context-hash": "[[HASH2]]"
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+
diff --git a/clang/test/ClangScanDeps/working-dir.m 
b/clang/test/ClangScanDeps/working-dir.m
index a04f8c2486b98d..c6b7b1988d3cf7 100644
--- a/clang/test/ClangScanDeps/working-dir.m
+++ b/clang/test/ClangScanDeps/working-dir.m
@@ -2,7 +2,7 @@
 // RUN: split-file %s %t
 // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > 
%t/build/compile-commands.json
 // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
-// RUN:   -j 1 -format experimental-full --optimize-args=all > %t/deps.db
+// RUN:   -j 1 -format experimental-full 
--optimize-args=header-search,system-warnings,vfs,canonicalize-macros > 
%t/deps.db
 // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
 
 // Check that there are two separate modules hashes. One for each working dir.
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 709dc513be2811..8d429534a20073 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -164,6 +164,8 @@ static void ParseArgs(int argc, char **argv) {
             .Case("system-warnings", ScanningOptimizations::SystemWarnings)
             .Case("vfs", ScanningOptimizations::VFS)
             .Case("canonicalize-macros", ScanningOptimizations::Macros)
+            .Case("ignore-current-working-dir",
+                  ScanningOptimizations::IgnoreCWD)
             .Case("all", ScanningOptimizations::All)
             .Default(std::nullopt);
     if (!Optimization) {

>From 1306637c4d7d64386b653dae71b3be1ffc1952d7 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi...@apple.com>
Date: Tue, 28 Jan 2025 14:44:04 -0800
Subject: [PATCH 2/2] Fix formatting

---
 clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp 
b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 714efb86fa3796..f84e849bce8d1b 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -481,8 +481,7 @@ static bool isSafeToIgnoreCWD(const CowCompilerInvocation 
&CI) {
 
 static std::string getModuleContextHash(const ModuleDeps &MD,
                                         const CowCompilerInvocation &CI,
-                                        bool EagerLoadModules,
-                                        bool IgnoreCWD,
+                                        bool EagerLoadModules, bool IgnoreCWD,
                                         llvm::vfs::FileSystem &VFS) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
       HashBuilder;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to