https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/74895
>From 3ce8e08b94e33480139e13ca9f0fd7b719ff2c3d Mon Sep 17 00:00:00 2001 From: Artem Belevich <t...@google.com> Date: Wed, 6 Dec 2023 12:11:38 -0800 Subject: [PATCH 1/2] [CUDA] Add support for CUDA-12.3 and sm_90a --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++++++++++-- clang/include/clang/Basic/Cuda.h | 7 +++++-- clang/lib/Basic/Cuda.cpp | 5 +++++ clang/lib/Basic/Targets/NVPTX.cpp | 3 +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 + clang/lib/Driver/ToolChains/Cuda.cpp | 6 ++++++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- llvm/lib/Target/NVPTX/NVPTX.td | 19 ++++++++++--------- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 7 ++++++- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 11 +++++++++-- 11 files changed, 60 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 89ea2f0930cec..1bf68a46a64da 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -937,6 +937,9 @@ CUDA/HIP Language Changes CUDA Support ^^^^^^^^^^^^ +- Clang now supports CUDA SDK up to 12.3 +- Added support for sm_90a + AIX Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d74a7d1e55dd2..0f2e8260143be 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -26,7 +26,9 @@ #pragma push_macro("SM_87") #pragma push_macro("SM_89") #pragma push_macro("SM_90") -#define SM_90 "sm_90" +#pragma push_macro("SM_90a") +#define SM_90a "sm_90a" +#define SM_90 "sm_90|" SM_90a #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -56,7 +58,11 @@ #pragma push_macro("PTX78") #pragma push_macro("PTX80") #pragma push_macro("PTX81") -#define PTX81 "ptx81" +#pragma push_macro("PTX82") +#pragma push_macro("PTX83") +#define PTX83 "ptx83" +#define PTX82 "ptx82|" PTX83 +#define PTX81 "ptx81|" PTX82 #define PTX80 "ptx80|" PTX81 #define PTX78 "ptx78|" PTX80 #define PTX77 "ptx77|" PTX78 @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_87") #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") +#pragma pop_macro("SM_90a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX78") #pragma pop_macro("PTX80") #pragma pop_macro("PTX81") +#pragma pop_macro("PTX82") +#pragma pop_macro("PTX83") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 2d912bdbbd1bc..916cb4b7ef34a 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -39,9 +39,11 @@ enum class CudaVersion { CUDA_118, CUDA_120, CUDA_121, - FULLY_SUPPORTED = CUDA_118, + CUDA_122, + CUDA_123, + FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_121, // Partially supported. Proceed with a warning. + CUDA_123, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -71,6 +73,7 @@ enum class CudaArch { SM_87, SM_89, SM_90, + SM_90a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 65840b9f20252..1b1da6a1356f2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(11, 8), CUDA_ENTRY(12, 0), CUDA_ENTRY(12, 1), + CUDA_ENTRY(12, 2), + CUDA_ENTRY(12, 3), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = { SM(87), // Jetson/Drive AGX Orin SM(89), // Ada Lovelace SM(90), // Hopper + SM(90a), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_89: case CudaArch::SM_90: return CudaVersion::CUDA_118; + case CudaArch::SM_90a: + return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3a4a75b0348f2..5c601812f6175 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case CudaArch::SM_89: return "890"; case CudaArch::SM_90: + case CudaArch::SM_90a: return "900"; } llvm_unreachable("unhandled CudaArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); + if (GPU == CudaArch::SM_90a) + Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 293ccaa3413cd..299ee1460b3db 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_87: case CudaArch::SM_89: case CudaArch::SM_90: + case CudaArch::SM_90a: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index e95ff98e6c940..ef1e77974c1ea 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_120; if (raw_version < 12020) return CudaVersion::CUDA_121; + if (raw_version < 12030) + return CudaVersion::CUDA_122; + if (raw_version < 12040) + return CudaVersion::CUDA_123; return CudaVersion::NEW; } @@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(123, 83); + CASE_CUDA_VERSION(122, 82); CASE_CUDA_VERSION(121, 81); CASE_CUDA_VERSION(120, 80); CASE_CUDA_VERSION(118, 78); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index c7146e63add5f..5475b1d8bd052 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -29,7 +29,7 @@ // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' -// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}} +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}} // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 // R600: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 02fa2a4ee81ec..f2a4ce381b40b 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td" // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -class FeatureSM<int version>: - SubtargetFeature<"sm_"# version, "SmVersion", - "" # version, - "Target SM " # version>; -def SM90a: FeatureSM<90>; +class FeatureSM<string sm, int value>: + SubtargetFeature<"sm_"# sm, "FullSmVersion", + "" # value, + "Target SM " # sm>; class FeaturePTX<int version>: SubtargetFeature<"ptx"# version, "PTXVersion", "" # version, "Use PTX version " # version>; -foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53, - 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in - def SM#version: FeatureSM<version>; +foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, + 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in + def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; + +def SM90a: FeatureSM<"90a", 901>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in + 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in def PTX#version: FeaturePTX<version>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7fa64af196b93..420065585b384 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); + // Re-map SM version numbers, SmVersion carries the regular SMs which do + // have relative order, while FullSmVersion allows distinguishing sm_90 from + // sm_90a, which would *not* be a subset of sm_91. + SmVersion = getSmVersion(); + // Set default to PTX 6.0 (CUDA 9.0) if (PTXVersion == 0) { PTXVersion = 60; @@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - SmVersion(20), TM(TM), + FullSmVersion(200), SmVersion(getSmVersion()), TM(TM), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {} bool NVPTXSubtarget::hasImageHandles() const { diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 93af11c258b48..951962d1e68be 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned PTXVersion; - // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310 + // sm_90a == 901 + unsigned int FullSmVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from + // FullSmVersion. unsigned int SmVersion; const NVPTXTargetMachine &TM; @@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool allowFP16Math() const; bool hasMaskOperator() const { return PTXVersion >= 71; } bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; } - unsigned int getSmVersion() const { return SmVersion; } + unsigned int getSmVersion() const { return FullSmVersion / 10; } + unsigned int getFullSmVersion() const { return FullSmVersion; } std::string getTargetName() const { return TargetName; } + bool isSm90a() const { return getFullSmVersion() == 901; } // Get maximum value of required alignments among the supported data types. // From the PTX ISA doc, section 8.2.3: >From 6bd838c0f60e050cad79bc8b198808eb5e7c1586 Mon Sep 17 00:00:00 2001 From: Artem Belevich <t...@google.com> Date: Fri, 8 Dec 2023 15:49:23 -0800 Subject: [PATCH 2/2] use hasAAFeatures() --- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 951962d1e68be..63e3b50a09fa0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -88,7 +88,11 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { unsigned int getSmVersion() const { return FullSmVersion / 10; } unsigned int getFullSmVersion() const { return FullSmVersion; } std::string getTargetName() const { return TargetName; } - bool isSm90a() const { return getFullSmVersion() == 901; } + + // GPUs with "a" suffix have include architecture-accelerated features that + // are supported on the specified architecture only, hence such targets do not + // follow the onion layer model. + bool hasAAFeatures() const { return getFullSmVersion() % 10; } // Get maximum value of required alignments among the supported data types. // From the PTX ISA doc, section 8.2.3: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits