https://github.com/DenisGZM created https://github.com/llvm/llvm-project/pull/99646
Supported `__usAtomicCAS` builtin originally defined in `/usr/local/cuda/inlcude/crt/sm_70_rt.hpp` >From 2cc142d85e706a2a4921e831fd78c8e609b70ae2 Mon Sep 17 00:00:00 2001 From: Denis Gerasimov <denis.gerasi...@baikalelectronics.ru> Date: Fri, 19 Jul 2024 15:47:57 +0300 Subject: [PATCH] [NVPTX] Support __usAtomicCAS builtin --- clang/include/clang/Basic/BuiltinsNVPTX.def | 3 +++ clang/lib/CodeGen/CGBuiltin.cpp | 3 +++ clang/lib/Headers/__clang_cuda_device_functions.h | 12 ++++++++++++ clang/test/CodeGen/builtins-nvptx.c | 3 +++ llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 15 +++++++++++++++ 6 files changed, 37 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 504314d8d96e9..3cf683a2f2129 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -829,6 +829,9 @@ BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) +TARGET_BUILTIN(__nvvm_atom_cas_gen_us, "UsUsD*UsUs", "n", SM_70) +TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_us, "UsUsD*UsUs", "n", SM_70) +TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_us, "UsUsD*UsUs", "n", SM_70) BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2d32a8582cfdf..9812a48ad4258 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20100,6 +20100,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_min_gen_ull: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); + case NVPTX::BI__nvvm_atom_cas_gen_us: case NVPTX::BI__nvvm_atom_cas_gen_i: case NVPTX::BI__nvvm_atom_cas_gen_l: case NVPTX::BI__nvvm_atom_cas_gen_ll: @@ -20291,6 +20292,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_sys_xor_gen_l: case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E); + case NVPTX::BI__nvvm_atom_cta_cas_gen_us: case NVPTX::BI__nvvm_atom_cta_cas_gen_i: case NVPTX::BI__nvvm_atom_cta_cas_gen_l: case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { @@ -20302,6 +20304,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } + case NVPTX::BI__nvvm_atom_sys_cas_gen_us: case NVPTX::BI__nvvm_atom_sys_cas_gen_i: case NVPTX::BI__nvvm_atom_sys_cas_gen_l: case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h index f8a12cefdb81b..f66fe625a3967 100644 --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -529,6 +529,18 @@ __DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); } __DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); }; __DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); }; __DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); } +__DEVICE__ unsigned short __usAtomicCAS(unsigned short *__p, unsigned short __cmp, + unsigned short __v) { + return __nvvm_atom_cas_gen_us(__p, __cmp, __v); +} +__DEVICE__ unsigned short __usAtomicCAS_block(unsigned short *__p, unsigned short __cmp, + unsigned short __v) { + return __nvvm_atom_cta_cas_gen_us(__p, __cmp, __v); +} +__DEVICE__ unsigned short __usAtomicCAS_system(unsigned short *__p, unsigned short __cmp, + unsigned short __v) { + return __nvvm_atom_sys_cas_gen_us(__p, __cmp, __v); +} __DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_add_gen_i((int *)__p, __v); } diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 75b9d6d1fe190..3ba1fabd05335 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -306,6 +306,9 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, // CHECK: atomicrmw umin ptr {{.*}} seq_cst, align 8 __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll); + // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 2 + // CHECK-NEXT: extractvalue { i16, i1 } {{%[0-9]+}}, 0 + __nvvm_atom_cas_gen_us(ip, 0, i); // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 4 // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0 __nvvm_atom_cas_gen_i(ip, 0, i); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index bc23998455a68..7c63f9215b55e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -871,7 +871,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // actions computeRegisterProperties(STI.getRegisterInfo()); - setMinCmpXchgSizeInBits(32); + setMinCmpXchgSizeInBits(16); setMaxAtomicSizeInBitsSupported(64); setMaxDivRemBitWidthSupported(64); } diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index c81dfa68e4bd4..e35eb8f55376b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1997,6 +1997,12 @@ defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64 // atom_cas +def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>; def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), @@ -2010,6 +2016,14 @@ def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>; +defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", + atomic_cmp_swap_i16_g, i16imm>; +defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas", + atomic_cmp_swap_i16_s, i16imm>; +defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas", + atomic_cmp_swap_i16_gen, i16imm>; +defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", + atomic_cmp_swap_i16_gen, i16imm>; defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas", atomic_cmp_swap_i32_g, i32imm>; defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas", @@ -2221,6 +2235,7 @@ multiclass ATOM2_incdec_impl<string OpStr> { // atom.cas multiclass ATOM3_cas_impl<string OpStr> { + defm _b16 : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, i16, []>; defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits