https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/84367
>From afac73145dede37a847064b0bf0b9681c431f7d3 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 7 Mar 2024 13:44:50 -0600 Subject: [PATCH] [CUDA] Include PTX in non-RDC mode using the new driver Summary: The old driver embed PTX in rdc-mode and so does the `nvcc` compiler. The new drivers currently does not do this, so we should keep it consistent in this case. This simply requires adding the assembler output as an input to the offloading action that gets fed to fatbin. --- clang/lib/Driver/Driver.cpp | 8 ++++++++ clang/lib/Driver/ToolChains/Cuda.cpp | 22 ++++++++++++---------- clang/test/Driver/cuda-phases.cu | 25 +++++++++++++------------ 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cecd34acbc92c0..96e6ad77f5e50d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + + // Compiling CUDA in non-RDC mode uses the PTX output if available. + for (Action *Input : A->getInputs()) + if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) + DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType())); + ++TCAndArch; } } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 177fd6310e7ee2..196ec29ef1465f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -503,18 +503,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } -static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { - bool includePTX = true; - for (Arg *A : Args) { - if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || - A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) - continue; +static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { + // The new driver does not include PTX by default. + bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false); + for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, + options::OPT_no_cuda_include_ptx_EQ)) { A->claim(); const StringRef ArchStr = A->getValue(); - if (ArchStr == "all" || ArchStr == gpu_arch) { - includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); - continue; - } + if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = true; + else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) && + (ArchStr == "all" || ArchStr == InputArch)) + includePTX = false; } return includePTX; } diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 9a231091de2bdc..a1c3c9b51b1e41 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -244,31 +244,32 @@ // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) -// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \ +// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s -// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda -// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output -// NEW-DRIVER-NEXT: 2: compiler, {1}, ir -// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52) +// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) +// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) +// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda) +// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object +// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object -// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image -// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object +// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) +// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda) // NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda) // NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda) // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \ // RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s + // NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda) // NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) // NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda) @@ -277,13 +278,13 @@ // NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52) // NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52) -// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object +// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object // NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70) // NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70) -// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object // NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda) // NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir // NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits