llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: Sergey Kozub (sergey-kozub) <details> <summary>Changes</summary> Adds support for sm_100 (Blackwell), similar to https://github.com/llvm/llvm-project/pull/74895 One important aspect is that `sm_100` is not compatible with `sm_90a`, only with `sm_90` - note the defines in "BuiltinsNVPTX.def" --- Full diff: https://github.com/llvm/llvm-project/pull/97402.diff 9 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+2-1) - (modified) clang/include/clang/Basic/BuiltinsNVPTX.def (+8-2) - (modified) clang/include/clang/Basic/Cuda.h (+3-1) - (modified) clang/lib/Basic/Cuda.cpp (+4) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+2) - (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+1) - (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+3) - (modified) clang/test/Misc/target-invalid-cpu-note.c (+1-1) - (modified) llvm/lib/Target/NVPTX/NVPTX.td (+3-2) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c720e47dbe35b..3c10ee51550d9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1057,7 +1057,8 @@ CUDA/HIP Language Changes CUDA Support ^^^^^^^^^^^^ -- Clang now supports CUDA SDK up to 12.5 +- Clang now supports CUDA SDK up to 12.6 +- Added support for sm_100 AIX Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 504314d8d96e9..3f383bc89ee70 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -27,8 +27,10 @@ #pragma push_macro("SM_89") #pragma push_macro("SM_90") #pragma push_macro("SM_90a") +#pragma push_macro("SM_100") +#define SM_100 "sm_100" #define SM_90a "sm_90a" -#define SM_90 "sm_90|" SM_90a +#define SM_90 "sm_90|" SM_90a "|" SM_100 #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -63,7 +65,9 @@ #pragma push_macro("PTX83") #pragma push_macro("PTX84") #pragma push_macro("PTX85") -#define PTX85 "ptx85" +#pragma push_macro("PTX86") +#define PTX86 "ptx86" +#define PTX85 "ptx85|" PTX86 #define PTX84 "ptx84|" PTX85 #define PTX83 "ptx83|" PTX84 #define PTX82 "ptx82|" PTX83 @@ -1075,6 +1079,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") #pragma pop_macro("SM_90a") +#pragma pop_macro("SM_100") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1097,3 +1102,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX83") #pragma pop_macro("PTX84") #pragma pop_macro("PTX85") +#pragma pop_macro("PTX86") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 83699f8897f66..a18e62620dd5d 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -43,9 +43,10 @@ enum class CudaVersion { CUDA_123, CUDA_124, CUDA_125, + CUDA_126, FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_125, // Partially supported. Proceed with a warning. + CUDA_126, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -78,6 +79,7 @@ enum class OffloadArch { SM_89, SM_90, SM_90a, + SM_100, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index faf3878f064d2..72d9bd89c36e7 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(12, 3), CUDA_ENTRY(12, 4), CUDA_ENTRY(12, 5), + CUDA_ENTRY(12, 6), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = { SM(89), // Ada Lovelace SM(90), // Hopper SM(90a), // Hopper + SM(100), // Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -221,6 +223,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { return CudaVersion::CUDA_118; case OffloadArch::SM_90a: return CudaVersion::CUDA_120; + case OffloadArch::SM_100: + return CudaVersion::CUDA_126; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 43b653dc52ce0..88a0dbde52d52 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_90: case OffloadArch::SM_90a: return "900"; + case OffloadArch::SM_100: + return "1000"; } llvm_unreachable("unhandled OffloadArch"); }(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index f5bd4a141cc2d..198b64bd7e1d4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2276,6 +2276,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_89: case OffloadArch::SM_90: case OffloadArch::SM_90a: + case OffloadArch::SM_100: case OffloadArch::GFX600: case OffloadArch::GFX601: case OffloadArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 08a4633902654..81a3703dc0da7 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -86,6 +86,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_124; if (raw_version < 12060) return CudaVersion::CUDA_125; + if (raw_version < 12070) + return CudaVersion::CUDA_126; return CudaVersion::NEW; } @@ -692,6 +694,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(126, 86); CASE_CUDA_VERSION(125, 85); CASE_CUDA_VERSION(124, 84); CASE_CUDA_VERSION(123, 83); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index a5f9ffa21220a..92b28c9a16520 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -29,7 +29,7 @@ // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' -// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}} +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, sm_100, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}} // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 // R600: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index bb4549a5e6078..9af8715ef52ae 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -35,14 +35,14 @@ class FeaturePTX<int version>: "Use PTX version " # version>; foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, - 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in + 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; def SM90a: FeatureSM<"90a", 901>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, - 80, 81, 82, 83, 84, 85] in + 80, 81, 82, 83, 84, 85, 86] in def PTX#version: FeaturePTX<version>; //===----------------------------------------------------------------------===// @@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>; def : Proc<"sm_89", [SM89, PTX78]>; def : Proc<"sm_90", [SM90, PTX78]>; def : Proc<"sm_90a", [SM90a, PTX80]>; +def : Proc<"sm_100", [SM100, PTX86]>; def NVPTXInstrInfo : InstrInfo { } `````````` </details> https://github.com/llvm/llvm-project/pull/97402 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits