llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: This patch adds a builtin for the `nanosleep` PTX function. It takes either an immediate or a register and sleeps for [0, 2t] nanoseconds given t. More information at the documentation: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-nanosleep --- Full diff: https://github.com/llvm/llvm-project/pull/79888.diff 5 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsNVPTX.def (+1) - (modified) clang/test/CodeGen/builtins-nvptx.c (+11) - (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+4) - (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+6) - (added) llvm/test/CodeGen/NVPTX/nanosleep.ll (+20) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 0f2e8260143be78..ef3a37c8753d162 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -155,6 +155,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") // MISC BUILTIN(__nvvm_prmt, "UiUiUiUi", "") +TARGET_BUILTIN(__nvvm_nanosleep, "vi", "n", AND(SM_70, PTX63)) // Min Max diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 353f3ebb608c2b1..b209e2fbad98fb0 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -810,6 +810,17 @@ __device__ void nvvm_vote(int pred) { // CHECK: ret void } +// CHECK-LABEL: nvvm_nanosleep +__device__ void nvvm_nanosleep(int d) { +#if __CUDA_ARCH__ >= 700 + // CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep + __nvvm_nanosleep(d); + + // CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep + __nvvm_nanosleep(1); +#endif +} + // CHECK-LABEL: nvvm_mbarrier __device__ void nvvm_mbarrier(long long* addr, __attribute__((address_space(3))) long long* sharedAddr, int count, long long state) { #if __CUDA_ARCH__ >= 800 diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 5a5ba2592e1467e..5d863b283d0466e 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -557,6 +557,10 @@ let TargetPrefix = "nvvm" in { DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">, + DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects]>; + // // Min Max // diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 33f1e4a43e072af..133514f4f48024e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -634,6 +634,12 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; +def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;", + [(int_nvvm_nanosleep imm:$i)]>, + Requires<[hasPTX<63>, hasSM<70>]>; +def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;", + [(int_nvvm_nanosleep Int32Regs:$i)]>, + Requires<[hasPTX<63>, hasSM<70>]>; // // Min Max // diff --git a/llvm/test/CodeGen/NVPTX/nanosleep.ll b/llvm/test/CodeGen/NVPTX/nanosleep.ll new file mode 100644 index 000000000000000..1b2a7bf9476cf5f --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/nanosleep.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify %} + +declare void @llvm.nvvm.nanosleep(i32) + +; CHECK-LABEL: test_nanosleep_r +define void @test_nanosleep_r(i32 noundef %d) { +entry: +; CHECK: nanosleep.u32 %[[REG:.+]]; + call void @llvm.nvvm.nanosleep(i32 %d) + ret void +} + +; CHECK-LABEL: test_nanosleep_i +define void @test_nanosleep_i() { +entry: +; CHECK: nanosleep.u32 42; + call void @llvm.nvvm.nanosleep(i32 42) + ret void +} `````````` </details> https://github.com/llvm/llvm-project/pull/79888 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits