https://github.com/DenisGZM created 
https://github.com/llvm/llvm-project/pull/99646

Supported `__usAtomicCAS` builtin originally defined in 
`/usr/local/cuda/inlcude/crt/sm_70_rt.hpp`


>From 2cc142d85e706a2a4921e831fd78c8e609b70ae2 Mon Sep 17 00:00:00 2001
From: Denis Gerasimov <denis.gerasi...@baikalelectronics.ru>
Date: Fri, 19 Jul 2024 15:47:57 +0300
Subject: [PATCH] [NVPTX] Support __usAtomicCAS builtin

---
 clang/include/clang/Basic/BuiltinsNVPTX.def       |  3 +++
 clang/lib/CodeGen/CGBuiltin.cpp                   |  3 +++
 clang/lib/Headers/__clang_cuda_device_functions.h | 12 ++++++++++++
 clang/test/CodeGen/builtins-nvptx.c               |  3 +++
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp       |  2 +-
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td          | 15 +++++++++++++++
 6 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def 
b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 504314d8d96e9..3cf683a2f2129 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -829,6 +829,9 @@ BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
 TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
 TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
 
+TARGET_BUILTIN(__nvvm_atom_cas_gen_us, "UsUsD*UsUs", "n", SM_70)
+TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_us, "UsUsD*UsUs", "n", SM_70)
+TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_us, "UsUsD*UsUs", "n", SM_70)
 BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
 TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
 TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2d32a8582cfdf..9812a48ad4258 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -20100,6 +20100,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned 
BuiltinID,
   case NVPTX::BI__nvvm_atom_min_gen_ull:
     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
 
+  case NVPTX::BI__nvvm_atom_cas_gen_us:
   case NVPTX::BI__nvvm_atom_cas_gen_i:
   case NVPTX::BI__nvvm_atom_cas_gen_l:
   case NVPTX::BI__nvvm_atom_cas_gen_ll:
@@ -20291,6 +20292,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned 
BuiltinID,
   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
+  case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
@@ -20302,6 +20304,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned 
BuiltinID,
             Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
   }
+  case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h 
b/clang/lib/Headers/__clang_cuda_device_functions.h
index f8a12cefdb81b..f66fe625a3967 100644
--- a/clang/lib/Headers/__clang_cuda_device_functions.h
+++ b/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -529,6 +529,18 @@ __DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
 __DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
 __DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
 __DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
+__DEVICE__ unsigned short __usAtomicCAS(unsigned short *__p, unsigned short 
__cmp,
+                                        unsigned short __v) {
+  return __nvvm_atom_cas_gen_us(__p, __cmp, __v);
+}
+__DEVICE__ unsigned short __usAtomicCAS_block(unsigned short *__p, unsigned 
short __cmp,
+                                              unsigned short __v) {
+  return __nvvm_atom_cta_cas_gen_us(__p, __cmp, __v);
+}
+__DEVICE__ unsigned short __usAtomicCAS_system(unsigned short *__p, unsigned 
short __cmp,
+                                               unsigned short __v) {
+  return __nvvm_atom_sys_cas_gen_us(__p, __cmp, __v);
+}
 __DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
   return __nvvm_atom_add_gen_i((int *)__p, __v);
 }
diff --git a/clang/test/CodeGen/builtins-nvptx.c 
b/clang/test/CodeGen/builtins-nvptx.c
index 75b9d6d1fe190..3ba1fabd05335 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -306,6 +306,9 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, 
double df, int *ip,
   // CHECK: atomicrmw umin ptr {{.*}} seq_cst, align 8
   __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
 
+  // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 2
+  // CHECK-NEXT: extractvalue { i16, i1 } {{%[0-9]+}}, 0
+  __nvvm_atom_cas_gen_us(ip, 0, i);
   // CHECK: cmpxchg ptr {{.*}} seq_cst seq_cst, align 4
   // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_i(ip, 0, i);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp 
b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bc23998455a68..7c63f9215b55e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -871,7 +871,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const 
NVPTXTargetMachine &TM,
   // actions
   computeRegisterProperties(STI.getRegisterInfo());
 
-  setMinCmpXchgSizeInBits(32);
+  setMinCmpXchgSizeInBits(16);
   setMaxAtomicSizeInBitsSupported(64);
   setMaxDivRemBitWidthSupported(64);
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td 
b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c81dfa68e4bd4..e35eb8f55376b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1997,6 +1997,12 @@ defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, 
Int64Regs, ".global", ".b64
 
 // atom_cas
 
+def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, 
node:$c),
+  (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
 def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
   (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
 def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
@@ -2010,6 +2016,14 @@ def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops 
node:$a, node:$b, node:$c),
 def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, 
node:$c),
   (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
 
+defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", 
".cas",
+  atomic_cmp_swap_i16_g, i16imm>;
+defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", 
".cas",
+  atomic_cmp_swap_i16_s, i16imm>;
+defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas",
+  atomic_cmp_swap_i16_gen, i16imm>;
+defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", 
".b16", ".cas",
+  atomic_cmp_swap_i16_gen, i16imm>;
 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", 
".cas",
   atomic_cmp_swap_i32_g, i32imm>;
 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", 
".cas",
@@ -2221,6 +2235,7 @@ multiclass ATOM2_incdec_impl<string OpStr> {
 
 // atom.cas
 multiclass ATOM3_cas_impl<string OpStr> {
+   defm _b16  : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, 
i16, []>;
    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, 
i32, []>;
    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, 
i64, []>;
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to