[clang] [Clang] Make `-Xarch_` handling generic for all toolchains (PR #125421)

Joseph Huber via cfe-commits Mon, 03 Feb 2025 15:11:36 -0800

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/125421


>From 79000d0a1ecd1312fb9bc06af0369b66a133e5d4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Sun, 2 Feb 2025 10:39:01 -0600
Subject: [PATCH] [Clang] Make `-Xarch_` handling generic for all toolchains

Summary:
Currently, `-Xarch_` is used to forward argument specially to certain
toolchains. Currently, this is only supported by the Darwin toolchain.
We want to be able to use this generically, and for offloading too. This
patch moves the handling out of the Darwin Toolchain and places it in
the `getArgsForToolchain` helper which is run before the arguments get
passed to the tools.

The main benefit here is that we now have a more generic version of
`-Xopenmp-target=`, which should probably just be deprecated.
Additionally, it allows us to specially pass arguments to different
architectures for offloading.

This patch is done in preparation for making selecting offloading
toolchains more generic, this will be helpful while people are moving
toward compile jobs that include multiple toolchins (SPIR-V, AMDGCN,
NVPTX).
---
 clang/include/clang/Driver/Options.td  |  7 ++--
 clang/lib/Driver/Driver.cpp            |  5 +--
 clang/lib/Driver/ToolChain.cpp         | 45 ++++++++++++++++++++------
 clang/lib/Driver/ToolChains/Darwin.cpp | 24 --------------
 clang/test/Driver/Xarch.c              |  8 +++++
 clang/test/Driver/offload-Xarch.c      | 39 ++++++++++++++++++++++
 6 files changed, 89 insertions(+), 39 deletions(-)
 create mode 100644 clang/test/Driver/offload-Xarch.c

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 0ab923fcdd5838..765b7f882e99a8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group<W_Group>,
 def Xanalyzer : Separate<["-"], "Xanalyzer">,
   HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
   Group<StaticAnalyzer_Group>;
-def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>;
+def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
+  HelpText<"Pass <arg> to the compiliation if the target matches <arch>">,
+  MetaVarName<"<arch> <arg>">;
 def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>,
   HelpText<"Pass <arg> to the CUDA/HIP host compilation">, 
MetaVarName<"<arg>">;
 def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>,
@@ -1115,14 +1117,13 @@ def fno_convergent_functions : Flag<["-"], 
"fno-convergent-functions">,
 
 // Common offloading options
 let Group = offload_Group in {
-def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>,
+def offload_arch_EQ : Joined<["--"], "offload-arch=">,
   Visibility<[ClangOption, FlangOption]>,
   HelpText<"Specify an offloading device architecture for CUDA, HIP, or 
OpenMP. (e.g. sm_35). "
            "If 'native' is used the compiler will detect locally installed 
architectures. "
            "For HIP offloading, the device architecture can be followed by 
target ID features "
            "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be 
specified more than once.">;
 def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">,
-  Flags<[NoXarchOption]>,
   Visibility<[ClangOption, FlangOption]>,
   HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, 
gfx906) from the list of devices to compile for. "
            "'all' resets the list to its default value.">;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 912777a9808b4b..5a4737fb381e6a 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3409,7 +3409,9 @@ class OffloadingActionBuilder final {
       // Collect all offload arch parameters, removing duplicates.
       std::set<StringRef> GpuArchs;
       bool Error = false;
-      for (Arg *A : Args) {
+      const ToolChain &TC = *ToolChains.front();
+      for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"",
+                                          AssociatedOffloadKind)) {
         if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
               A->getOption().matches(options::OPT_no_offload_arch_EQ)))
           continue;
@@ -3420,7 +3422,6 @@ class OffloadingActionBuilder final {
               ArchStr == "all") {
             GpuArchs.clear();
           } else if (ArchStr == "native") {
-            const ToolChain &TC = *ToolChains.front();
             auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
             if (!GPUsOrErr) {
               TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index ebc982096595e6..fc2a07ff26fc02 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs(
       A->getOption().matches(options::OPT_Xarch_host))
     ValuePos = 0;
 
-  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
+  const InputArgList &BaseArgs = Args.getBaseArgs();
+  unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos));
   unsigned Prev = Index;
   std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
 
@@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs(
     Diags.Report(DiagID) << A->getAsString(Args);
     return;
   }
+
   XarchArg->setBaseArg(A);
   A = XarchArg.release();
+
+  // Linker input arguments require custom handling. The problem is that we
+  // have already constructed the phase actions, so we can not treat them as
+  // "input arguments".
+  if (A->getOption().hasFlag(options::LinkerInput)) {
+    // Convert the argument into individual Zlinker_input_args. Need to do this
+    // manually to avoid memory leaks with the allocated arguments.
+    for (const char *Value : A->getValues()) {
+      auto Opt = Opts.getOption(options::OPT_Zlinker_input);
+      unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
+      auto NewArg =
+          new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()),
+                  Index, BaseArgs.getArgString(Index + 1), A);
+
+      DAL->append(NewArg);
+      if (!AllocatedArgs)
+        DAL->AddSynthesizedArg(NewArg);
+      else
+        AllocatedArgs->push_back(NewArg);
+    }
+  }
+
   if (!AllocatedArgs)
     DAL->AddSynthesizedArg(A);
   else
@@ -1697,19 +1721,20 @@ llvm::opt::DerivedArgList 
*ToolChain::TranslateXarchArgs(
     } else if (A->getOption().matches(options::OPT_Xarch_host)) {
       NeedTrans = !IsDevice;
       Skip = IsDevice;
-    } else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) {
-      // Do not translate -Xarch_ options for non CUDA/HIP toolchain since
-      // they may need special translation.
-      // Skip this argument unless the architecture matches BoundArch
-      if (BoundArch.empty() || A->getValue(0) != BoundArch)
-        Skip = true;
-      else
-        NeedTrans = true;
+    } else if (A->getOption().matches(options::OPT_Xarch__)) {
+      StringRef Arg = A->getValue(1);
+      NeedTrans =
+          A->getValue() == getArchName() ||
+          (!BoundArch.empty() && A->getValue() == BoundArch) ||
+          (Arg.consume_front("--offload-arch=") && A->getValue() == Arg);
+      Skip = !NeedTrans;
     }
     if (NeedTrans || Skip)
       Modified = true;
-    if (NeedTrans)
+    if (NeedTrans) {
+      A->claim();
       TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
+    }
     if (!Skip)
       DAL->append(A);
   }
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp 
b/clang/lib/Driver/ToolChains/Darwin.cpp
index 9a276c55bf7bcb..b26c5bf1a909ed 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const 
DerivedArgList &Args,
   // and try to push it down into tool specific logic.
 
   for (Arg *A : Args) {
-    if (A->getOption().matches(options::OPT_Xarch__)) {
-      // Skip this argument unless the architecture matches either the 
toolchain
-      // triple arch, or the arch being bound.
-      StringRef XarchArch = A->getValue(0);
-      if (!(XarchArch == getArchName() ||
-            (!BoundArch.empty() && XarchArch == BoundArch)))
-        continue;
-
-      Arg *OriginalArg = A;
-      TranslateXarchArgs(Args, A, DAL);
-
-      // Linker input arguments require custom handling. The problem is that we
-      // have already constructed the phase actions, so we can not treat them 
as
-      // "input arguments".
-      if (A->getOption().hasFlag(options::LinkerInput)) {
-        // Convert the argument into individual Zlinker_input_args.
-        for (const char *Value : A->getValues()) {
-          DAL->AddSeparateArg(
-              OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
-        }
-        continue;
-      }
-    }
-
     // Sob. These is strictly gcc compatible for the time being. Apple
     // gcc translates options twice, which means that self-expanding
     // options add duplicates.
diff --git a/clang/test/Driver/Xarch.c b/clang/test/Driver/Xarch.c
index f7693fb689d588..f35e2926f9c8d5 100644
--- a/clang/test/Driver/Xarch.c
+++ b/clang/test/Driver/Xarch.c
@@ -1,8 +1,13 @@
 // RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 
2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 
2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S 
-### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 
2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s 
-S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
 // O3ONCE: "-O3"
 // O3ONCE-NOT: "-O3"
 
 // RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 
2>&1 | FileCheck -check-prefix=O3NONE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S 
-### 2>&1 | FileCheck -check-prefix=O3NONE %s
 // O3NONE-NOT: "-O3"
 // O3NONE: argument unused during compilation: '-Xarch_i386 -O3'
 
@@ -10,3 +15,6 @@
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -S'
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 
2>&1 | FileCheck -check-prefix=LINKER %s
+// LINKER: "foo"
diff --git a/clang/test/Driver/offload-Xarch.c 
b/clang/test/Driver/offload-Xarch.c
new file mode 100644
index 00000000000000..262ac1d1e62b49
--- /dev/null
+++ b/clang/test/Driver/offload-Xarch.c
@@ -0,0 +1,39 @@
+// RUN: %clang -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 
| FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -x cuda %s -Xarch_device -O3 -S -nogpulib -nogpuinc -### 2>&1 | 
FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | 
FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc 
\
+// RUN:   -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib 
-nogpuinc \
+// RUN:   -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=O3ONCE %s
+// O3ONCE: "-O3"
+// O3ONCE-NOT: "-O3"
+
+// RUN: %clang -fopenmp=libomp 
-fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \
+// RUN:   --target=x86_64-unknown-linux-gnu -Xarch_nvptx64 
--offload-arch=sm_52,sm_60 -nogpuinc \
+// RUN:   -Xarch_amdgcn --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### 
%s 2>&1 \
+// RUN: | FileCheck -check-prefix=OPENMP %s
+//
+// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], 
output: "[[HOST_BC:.+]]"
+// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", 
"[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]"
+// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", 
"[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", 
"[[HOST_BC]]"], output: "[[SM52_PTX:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: 
["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", 
"[[HOST_BC]]"], output: "[[SM60_PTX:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: 
["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: 
["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], 
output: "[[BINARY:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", 
"[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: 
["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 
-Xarch_sm_60 -O0 \
+// RUN:   --target=x86_64-unknown-linux-gnu -Xarch_host -O3 -S -nogpulib 
-nogpuinc -### 2>&1 \
+// RUN: | FileCheck -check-prefix=CUDA %s
+// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" 
{{.*}}"-O3"
+// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" 
{{.*}}"-O0"
+// CUDA: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}}"-O3"
+
+// RUN: %clang -x cuda %s -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda 
\
+// RUN:   -Xarch_sm_52 --offload-arch=sm_52 -S -nogpulib -nogpuinc -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPECIFIC %s
+// SPECIFIC: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52"

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] Make `-Xarch_` handling generic for all toolchains (PR #125421)

Reply via email to