https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79373
>From 145b7bc932ce3ffa46545cd7af29b1c93981429c Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 24 Jan 2024 15:34:00 -0600 Subject: [PATCH 1/3] [NVPTX] Add support for -march=native in standalone NVPTX Summary: We support `--target=nvptx64-nvidia-cuda` as a way to target the NVPTX architecture from standard CPU. This patch simply uses the existing support for handling `--offload-arch=native` to also apply to the standalone toolchain. --- clang/lib/Driver/ToolChains/Cuda.cpp | 61 +++++++++++++--------- clang/lib/Driver/ToolChains/Cuda.h | 10 ++-- clang/test/Driver/nvptx-cuda-system-arch.c | 5 ++ 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 1462576ca870e6f..6215c43b5fc96bd 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -738,9 +738,18 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (!llvm::is_contained(*DAL, A)) DAL->append(A); - if (!DAL->hasArg(options::OPT_march_EQ)) + if (!DAL->hasArg(options::OPT_march_EQ)) { DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), CudaArchToString(CudaArch::CudaDefault)); + } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") { + auto GPUsOrErr = getSystemGPUArchs(Args); + if (!GPUsOrErr) + getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march"; + else + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + Args.MakeArgString(GPUsOrErr->front())); + } return DAL; } @@ -783,6 +792,31 @@ void NVPTXToolChain::adjustDebugInfoKind( } } +Expected<SmallVector<std::string>> +NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { + // Detect NVIDIA GPUs availible on the system. + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ)) + Program = A->getValue(); + else + Program = GetProgramPath("nvptx-arch"); + + auto StdoutOrErr = executeToolChainProgram(Program); + if (!StdoutOrErr) + return StdoutOrErr.takeError(); + + SmallVector<std::string, 1> GPUArchs; + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + + if (GPUArchs.empty()) + return llvm::createStringError(std::error_code(), + "No NVIDIA GPU detected in the system"); + + return std::move(GPUArchs); +} + /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, /// which isn't properly a linker but nonetheless performs the step of stitching /// together object files from the assembler into a single blob. @@ -948,31 +982,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, return DAL; } -Expected<SmallVector<std::string>> -CudaToolChain::getSystemGPUArchs(const ArgList &Args) const { - // Detect NVIDIA GPUs availible on the system. - std::string Program; - if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ)) - Program = A->getValue(); - else - Program = GetProgramPath("nvptx-arch"); - - auto StdoutOrErr = executeToolChainProgram(Program); - if (!StdoutOrErr) - return StdoutOrErr.takeError(); - - SmallVector<std::string, 1> GPUArchs; - for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) - if (!Arch.empty()) - GPUArchs.push_back(Arch.str()); - - if (GPUArchs.empty()) - return llvm::createStringError(std::error_code(), - "No NVIDIA GPU detected in the system"); - - return std::move(GPUArchs); -} - Tool *NVPTXToolChain::buildAssembler() const { return new tools::NVPTX::Assembler(*this); } diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 8a053f3393e1206..43c17ba7c0ba03d 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -168,6 +168,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain { unsigned GetDefaultDwarfVersion() const override { return 2; } unsigned getMaxDwarfVersion() const override { return 2; } + /// Uses nvptx-arch tool to get arch of the system GPU. Will return error + /// if unable to find one. + virtual Expected<SmallVector<std::string>> + getSystemGPUArchs(const llvm::opt::ArgList &Args) const override; + CudaInstallationDetector CudaInstallation; protected: @@ -223,11 +228,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { const ToolChain &HostTC; - /// Uses nvptx-arch tool to get arch of the system GPU. Will return error - /// if unable to find one. - virtual Expected<SmallVector<std::string>> - getSystemGPUArchs(const llvm::opt::ArgList &Args) const override; - protected: Tool *buildAssembler() const override; // ptxas Tool *buildLinker() const override; // fatbinary (ok, not really a linker) diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index 037215fd52a88b2..bd001f82052dc38 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -31,3 +31,8 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --offload-new-driver --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=ARCH-sm_70 // ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when nvptx-arch is used via '-march=native' +// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ +// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70 +// MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" >From a73f15b41d54f0bc8e40a4ed3a4877248d2f75d3 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 25 Jan 2024 07:50:58 -0600 Subject: [PATCH 2/3] Add test --- clang/lib/Driver/ToolChains/Cuda.cpp | 9 +++++++-- .../test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 | 4 ++++ clang/test/Driver/nvptx-cuda-system-arch.c | 8 ++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 6215c43b5fc96bd..71e7b46dbd72d8f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -743,12 +743,17 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, CudaArchToString(CudaArch::CudaDefault)); } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") { auto GPUsOrErr = getSystemGPUArchs(Args); - if (!GPUsOrErr) + if (!GPUsOrErr) { getDriver().Diag(diag::err_drv_undetermined_gpu_arch) << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march"; - else + } else { + if (GPUsOrErr->size() > 1) + getDriver().Diag(diag::warn_drv_multi_gpu_arch) + << llvm::Triple::getArchTypeName(getArch()) + << llvm::join(*GPUsOrErr, ", ") << "-march"; DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Args.MakeArgString(GPUsOrErr->front())); + } } return DAL; diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 new file mode 100644 index 000000000000000..c9df565b9f8d7af --- /dev/null +++ b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 @@ -0,0 +1,4 @@ +#!/bin/sh +echo sm_89 +echo sm_80 +exit 0 diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index bd001f82052dc38..81cad3715066927 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -6,9 +6,11 @@ // RUN: mkdir -p %t // RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/ // RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 %t/ // RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty // RUN: chmod +x %t/nvptx_arch_fail // RUN: chmod +x %t/nvptx_arch_sm_70 +// RUN: chmod +x %t/nvptx_arch_sm_89_sm_80 // RUN: chmod +x %t/nvptx_arch_empty // case when nvptx-arch returns nothing or fails @@ -36,3 +38,9 @@ // RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70 // MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when nvptx-arch is used via '-march=native' +// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_89_sm_80 \ +// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89 +// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu] +// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89" >From b1429fdb3bac32afe04746869d6c5525d750f373 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 25 Jan 2024 07:53:15 -0600 Subject: [PATCH 3/3] Format --- clang/lib/Driver/ToolChains/Cuda.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 71e7b46dbd72d8f..8a9d0caaccf30bb 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -749,8 +749,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } else { if (GPUsOrErr->size() > 1) getDriver().Diag(diag::warn_drv_multi_gpu_arch) - << llvm::Triple::getArchTypeName(getArch()) - << llvm::join(*GPUsOrErr, ", ") << "-march"; + << getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march"; DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Args.MakeArgString(GPUsOrErr->front())); } _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits