[clang] [llvm] [HIP][HIPSTDPAR][NFC] Re-order & adapt `hipstdpar` specific passes (PR #134753)

Alex Voicu via cfe-commits Fri, 11 Apr 2025 15:36:04 -0700

https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/134753


>From a988ecf63dc79d226c2f7aa1430f65d08256888b Mon Sep 17 00:00:00 2001
From: Alex Voicu <[email protected]>
Date: Tue, 8 Apr 2025 00:20:27 +0100
Subject: [PATCH 1/4] Re-order & adapt `hipstdpar` specific passes.

---
 clang/lib/CodeGen/BackendUtil.cpp             |  8 ++++----
 clang/lib/Driver/ToolChains/HIPAMD.cpp        |  7 ++++---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 20 ++++++++++++-------
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 7557cb8408921..fa5e12d4033a5 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1115,6 +1115,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   if (CodeGenOpts.LinkBitcodePostopt)
     MPM.addPass(LinkInModulesPass(BC));
 
+  if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
+      LangOpts.HIPStdParInterposeAlloc)
+  MPM.addPass(HipStdParAllocationInterpositionPass());
+
   // Add a verifier pass if requested. We don't have to do this if the action
   // requires code generation because there will already be a verifier pass in
   // the code-generation pipeline.
@@ -1178,10 +1182,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     return;
   }
 
-  if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
-      LangOpts.HIPStdParInterposeAlloc)
-    MPM.addPass(HipStdParAllocationInterpositionPass());
-
   // Now that we have all of the passes ready, run them.
   {
     PrettyStackTraceString CrashInfo("Optimizer");
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp 
b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index abb83701759ce..52e35a01be58d 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -231,10 +231,11 @@ void HIPAMDToolChain::addClangTargetOptions(
   CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-                          false))
+                          false)) {
     CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
-  if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
-    CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
+    if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
+      CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
+  }
 
   StringRef MaxThreadsPerBlock =
       DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4b5c70f09155f..03b1693244879 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -802,17 +802,17 @@ void 
AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
 #include "llvm/Passes/TargetPassRegistry.inc"
 
-  PB.registerPipelineStartEPCallback(
-      [](ModulePassManager &PM, OptimizationLevel Level) {
-        if (EnableHipStdPar)
-          PM.addPass(HipStdParAcceleratorCodeSelectionPass());
-      });
-
   PB.registerPipelineEarlySimplificationEPCallback(
       [](ModulePassManager &PM, OptimizationLevel Level,
          ThinOrFullLTOPhase Phase) {
-        if (!isLTOPreLink(Phase))
+        if (!isLTOPreLink(Phase)) {
+          // When we are not using -fgpu-rdc, we can run accelerator code
+          // selection relatively early, but still after linking to prevent
+          // eager removal of potentially reachable symbols.
+          if (EnableHipStdPar)
+            PM.addPass(HipStdParAcceleratorCodeSelectionPass());
           PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+        }
 
         if (Level == OptimizationLevel::O0)
           return;
@@ -883,6 +883,12 @@ void 
AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
+        // When we are using -fgpu-rdc, we can onky run accelerator code
+        // selection after linking to prevent, otherwise we end up removing
+        // potentially reachable symbols that were exported as external in 
other
+        // modules.
+        if (EnableHipStdPar)
+          PM.addPass(HipStdParAcceleratorCodeSelectionPass());
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.

>From 5cd1abb217d7fb2dd1f33c94a4f285b9aacd8dde Mon Sep 17 00:00:00 2001
From: Alex Voicu <[email protected]>
Date: Tue, 8 Apr 2025 00:27:18 +0100
Subject: [PATCH 2/4] Fix formatting.

---
 clang/lib/CodeGen/BackendUtil.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index fa5e12d4033a5..f7eb853beb23c 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1117,7 +1117,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
 
   if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
       LangOpts.HIPStdParInterposeAlloc)
-  MPM.addPass(HipStdParAllocationInterpositionPass());
+    MPM.addPass(HipStdParAllocationInterpositionPass());
 
   // Add a verifier pass if requested. We don't have to do this if the action
   // requires code generation because there will already be a verifier pass in

>From 2765739128a30b1dc94a8e8d8ab76a6f91c88e6a Mon Sep 17 00:00:00 2001
From: Alex Voicu <[email protected]>
Date: Tue, 8 Apr 2025 18:54:28 +0100
Subject: [PATCH 3/4] Add tests.

---
 .../rdc-does-not-enable-hipstdpar.cpp         | 11 ++++++++++
 .../select-accelerator-code-pass-ordering.cpp | 21 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 
clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
 create mode 100644 
clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp

diff --git a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp 
b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
new file mode 100644
index 0000000000000..c737fc9a42423
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
@@ -0,0 +1,11 @@
+// Check that if we are compiling with fgpu-rdc amdgpu-enable-hipstdpar is not
+// passed to CC1, to avoid eager, per TU, removal of potentially accessible
+// functions.
+
+// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc \
+// RUN:   2>&1 | FileCheck -check-prefix=NORDC %s
+// NORDC: {{".*clang.*".* "-triple" "amdgcn-amd-amdhsa".* "-mllvm" 
"-amdgpu-enable-hipstdpar".*}}
+
+// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc 
-fgpu-rdc \
+// RUN:   2>&1 | FileCheck -check-prefix=RDC %s
+// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
\ No newline at end of file
diff --git 
a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp 
b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
new file mode 100644
index 0000000000000..81aedc9cbcf03
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
@@ -0,0 +1,21 @@
+// Test that the accelerator code selection pass only gets invoked after 
linking
+
+// Ensure Pass HipStdParAcceleratorCodeSelectionPass is not invoked in PreLink.
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar 
-flto -emit-llvm-bc -fcuda-is-device -fdebug-pass-manager \
+// RUN:  %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-PRE %s
+// HIPSTDPAR-PRE-NOT: Running pass: HipStdParAcceleratorCodeSelectionPass
+
+// Ensure Pass HipStdParAcceleratorCodeSelectionPass is invoked in PostLink.
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar 
-fcuda-is-device -fdebug-pass-manager -emit-llvm \
+// RUN:  %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-POST %s
+// HIPSTDPAR-POST: Running pass: HipStdParAcceleratorCodeSelection
+
+#define __device__ __attribute__((device))
+
+void foo(float *a, float b) {
+  *a = b;
+}
+
+__device__ void bar(float *a, float b) {
+  *a = b;
+}

>From 9df1e62007e40c91227945b24da7c634d801c587 Mon Sep 17 00:00:00 2001
From: Alex Voicu <[email protected]>
Date: Fri, 11 Apr 2025 23:35:45 +0100
Subject: [PATCH 4/4] Fix typos.

---
 clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp | 2 +-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp 
b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
index c737fc9a42423..da1c3943553f0 100644
--- a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
+++ b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
@@ -8,4 +8,4 @@
 
 // RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc 
-fgpu-rdc \
 // RUN:   2>&1 | FileCheck -check-prefix=RDC %s
-// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
\ No newline at end of file
+// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dce6e594873aa..5fec974c5f9a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -886,7 +886,7 @@ void 
AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
-        // When we are using -fgpu-rdc, we can onky run accelerator code
+        // When we are using -fgpu-rdc, we can only run accelerator code
         // selection after linking to prevent, otherwise we end up removing
         // potentially reachable symbols that were exported as external in 
other
         // modules.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [HIP][HIPSTDPAR][NFC] Re-order & adapt `hipstdpar` specific passes (PR #134753)

Reply via email to