yaxunl created this revision.
yaxunl added reviewers: rjmccall, tra.
Herald added subscribers: kerbowa, jvesely.
Herald added a project: All.
yaxunl requested review of this revision.

LLVM IR already allows floating point type in atomicrmw.
Update clang atomic fetch max/min builtins to accept
floating point type like we did for fetch add/sub.


https://reviews.llvm.org/D150985

Files:
  clang/lib/CodeGen/CGAtomic.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
  clang/test/Sema/atomic-ops.c
  clang/test/SemaOpenCL/atomic-ops.cl

Index: clang/test/SemaOpenCL/atomic-ops.cl
===================================================================
--- clang/test/SemaOpenCL/atomic-ops.cl
+++ clang/test/SemaOpenCL/atomic-ops.cl
@@ -61,8 +61,8 @@
 
   __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
   __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
-  __opencl_atomic_fetch_min(f, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
-  __opencl_atomic_fetch_max(f, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+  __opencl_atomic_fetch_min(f, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_max(f, 1, memory_order_seq_cst, memory_scope_work_group);
 
   bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
   bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
Index: clang/test/Sema/atomic-ops.c
===================================================================
--- clang/test/Sema/atomic-ops.c
+++ clang/test/Sema/atomic-ops.c
@@ -205,8 +205,8 @@
   __atomic_fetch_sub(P, 3, memory_order_seq_cst);
   __atomic_fetch_sub(D, 3, memory_order_seq_cst);
   __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}}
-  __atomic_fetch_min(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}}
-  __atomic_fetch_max(P, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}}
+  __atomic_fetch_min(D, 3, memory_order_seq_cst);
+  __atomic_fetch_max(P, 3, memory_order_seq_cst);
   __atomic_fetch_max(p, 3);                       // expected-error {{too few arguments to function call, expected 3, have 2}}
 
   __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
Index: clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -1,29 +1,98 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
 // RUN:   -fnative-half-arguments-and-returns | FileCheck %s
 
+// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
+// RUN:   -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
+
+// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
+// RUN:   -fnative-half-arguments-and-returns -munsafe-fp-atomics \
+// RUN:   | FileCheck -check-prefix=UNSAFE %s
+
 // REQUIRES: amdgpu-registered-target
 
 #include "Inputs/cuda.h"
 #include <stdatomic.h>
 
-__device__ float ffp1(float *p) {
+__global__ void ffp1(float *p) {
   // CHECK-LABEL: @_Z4ffp1Pf
   // CHECK: atomicrmw fadd ptr {{.*}} monotonic
-  return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
+  // CHECK: atomicrmw fmax ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
+  // SAFE: _Z4ffp1Pf
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // UNSAFE: _Z4ffp1Pf
+  // UNSAFE: global_atomic_add_f32
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
+  __atomic_fetch_max(p, 1.0f, memory_order_relaxed);
+  __atomic_fetch_min(p, 1.0f, memory_order_relaxed);
+  __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
+  __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
 }
 
-__device__ double ffp2(double *p) {
+__global__ void ffp2(double *p) {
   // CHECK-LABEL: @_Z4ffp2Pd
   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
-  return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
+  // CHECK: atomicrmw fmax ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
+  // SAFE: _Z4ffp2Pd
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // UNSAFE: _Z4ffp2Pd
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
+  __atomic_fetch_max(p, 1.0, memory_order_relaxed);
+  __atomic_fetch_min(p, 1.0, memory_order_relaxed);
+  __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
+  __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
 }
 
 // long double is the same as double for amdgcn.
-__device__ long double ffp3(long double *p) {
+__global__ void ffp3(long double *p) {
   // CHECK-LABEL: @_Z4ffp3Pe
   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
-  return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
+  // CHECK: atomicrmw fmax ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
+  // SAFE: _Z4ffp3Pe
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // SAFE: global_atomic_cmpswap_b64
+  // UNSAFE: _Z4ffp3Pe
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  // UNSAFE: global_atomic_cmpswap_x2
+  __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
+  __atomic_fetch_max(p, 1.0L, memory_order_relaxed);
+  __atomic_fetch_min(p, 1.0L, memory_order_relaxed);
+  __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
+  __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
 }
 
 __device__ double ffp4(double *p, float f) {
@@ -39,3 +108,29 @@
   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
   return __atomic_fetch_sub(p, i, memory_order_relaxed);
 }
+
+__global__ void ffp6(_Float16 *p) {
+  // CHECK-LABEL: @_Z4ffp6PDF16
+  // CHECK: atomicrmw fadd ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmax ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} monotonic
+  // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
+  // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
+  // SAFE: _Z4ffp6PDF16
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // SAFE: global_atomic_cmpswap
+  // UNSAFE: _Z4ffp6PDF16
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  // UNSAFE: global_atomic_cmpswap
+  __atomic_fetch_add(p, 1.0, memory_order_relaxed);
+  __atomic_fetch_max(p, 1.0, memory_order_relaxed);
+  __atomic_fetch_min(p, 1.0, memory_order_relaxed);
+  __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
+  __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -6377,7 +6377,7 @@
              Op == AtomicExpr::AO__atomic_store_n ||
              Op == AtomicExpr::AO__atomic_exchange_n ||
              Op == AtomicExpr::AO__atomic_compare_exchange_n;
-  bool IsAddSub = false;
+  bool AllowFP = false;
 
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -6403,18 +6403,26 @@
   case AtomicExpr::AO__atomic_store_n:
     Form = Copy;
     break;
-  case AtomicExpr::AO__hip_atomic_fetch_add:
-  case AtomicExpr::AO__hip_atomic_fetch_min:
-  case AtomicExpr::AO__hip_atomic_fetch_max:
-  case AtomicExpr::AO__c11_atomic_fetch_add:
-  case AtomicExpr::AO__c11_atomic_fetch_sub:
-  case AtomicExpr::AO__opencl_atomic_fetch_add:
-  case AtomicExpr::AO__opencl_atomic_fetch_sub:
   case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_min:
   case AtomicExpr::AO__atomic_fetch_sub:
   case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__atomic_min_fetch:
   case AtomicExpr::AO__atomic_sub_fetch:
-    IsAddSub = true;
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__c11_atomic_fetch_min:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
+  case AtomicExpr::AO__hip_atomic_fetch_add:
+  case AtomicExpr::AO__hip_atomic_fetch_max:
+  case AtomicExpr::AO__hip_atomic_fetch_min:
+    AllowFP = true;
     Form = Arithmetic;
     break;
   case AtomicExpr::AO__c11_atomic_fetch_and:
@@ -6437,16 +6445,6 @@
   case AtomicExpr::AO__atomic_nand_fetch:
     Form = Arithmetic;
     break;
-  case AtomicExpr::AO__c11_atomic_fetch_min:
-  case AtomicExpr::AO__c11_atomic_fetch_max:
-  case AtomicExpr::AO__opencl_atomic_fetch_min:
-  case AtomicExpr::AO__opencl_atomic_fetch_max:
-  case AtomicExpr::AO__atomic_min_fetch:
-  case AtomicExpr::AO__atomic_max_fetch:
-  case AtomicExpr::AO__atomic_fetch_min:
-  case AtomicExpr::AO__atomic_fetch_max:
-    Form = Arithmetic;
-    break;
 
   case AtomicExpr::AO__c11_atomic_exchange:
   case AtomicExpr::AO__hip_atomic_exchange:
@@ -6548,12 +6546,12 @@
         return false;
       return true;
     };
-    if (IsAddSub && !IsAllowedValueType(ValType)) {
+    if (AllowFP && !IsAllowedValueType(ValType)) {
       Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_ptr_or_fp)
           << IsC11 << Ptr->getType() << Ptr->getSourceRange();
       return ExprError();
     }
-    if (!IsAddSub && !ValType->isIntegerType()) {
+    if (!AllowFP && !ValType->isIntegerType()) {
       Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int)
           << IsC11 << Ptr->getType() << Ptr->getSourceRange();
       return ExprError();
Index: clang/lib/CodeGen/CGAtomic.cpp
===================================================================
--- clang/lib/CodeGen/CGAtomic.cpp
+++ clang/lib/CodeGen/CGAtomic.cpp
@@ -636,8 +636,11 @@
   case AtomicExpr::AO__hip_atomic_fetch_min:
   case AtomicExpr::AO__opencl_atomic_fetch_min:
   case AtomicExpr::AO__atomic_fetch_min:
-    Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
-                                                  : llvm::AtomicRMWInst::UMin;
+    Op = E->getValueType()->isFloatingType()
+             ? llvm::AtomicRMWInst::FMin
+             : (E->getValueType()->isSignedIntegerType()
+                    ? llvm::AtomicRMWInst::Min
+                    : llvm::AtomicRMWInst::UMin);
     break;
 
   case AtomicExpr::AO__atomic_max_fetch:
@@ -647,8 +650,11 @@
   case AtomicExpr::AO__hip_atomic_fetch_max:
   case AtomicExpr::AO__opencl_atomic_fetch_max:
   case AtomicExpr::AO__atomic_fetch_max:
-    Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
-                                                  : llvm::AtomicRMWInst::UMax;
+    Op = E->getValueType()->isFloatingType()
+             ? llvm::AtomicRMWInst::FMax
+             : (E->getValueType()->isSignedIntegerType()
+                    ? llvm::AtomicRMWInst::Max
+                    : llvm::AtomicRMWInst::UMax);
     break;
 
   case AtomicExpr::AO__atomic_and_fetch:
@@ -916,9 +922,19 @@
     }
     [[fallthrough]];
   case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_min:
   case AtomicExpr::AO__atomic_fetch_sub:
   case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__atomic_min_fetch:
   case AtomicExpr::AO__atomic_sub_fetch:
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__c11_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__hip_atomic_fetch_max:
+  case AtomicExpr::AO__hip_atomic_fetch_min:
     ShouldCastToIntPtrTy = !MemTy->isFloatingType();
     [[fallthrough]];
 
@@ -934,13 +950,9 @@
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
   case AtomicExpr::AO__c11_atomic_fetch_nand:
-  case AtomicExpr::AO__c11_atomic_fetch_max:
-  case AtomicExpr::AO__c11_atomic_fetch_min:
   case AtomicExpr::AO__opencl_atomic_fetch_and:
   case AtomicExpr::AO__opencl_atomic_fetch_or:
   case AtomicExpr::AO__opencl_atomic_fetch_xor:
-  case AtomicExpr::AO__opencl_atomic_fetch_min:
-  case AtomicExpr::AO__opencl_atomic_fetch_max:
   case AtomicExpr::AO__atomic_fetch_and:
   case AtomicExpr::AO__hip_atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_or:
@@ -952,12 +964,6 @@
   case AtomicExpr::AO__atomic_or_fetch:
   case AtomicExpr::AO__atomic_xor_fetch:
   case AtomicExpr::AO__atomic_nand_fetch:
-  case AtomicExpr::AO__atomic_max_fetch:
-  case AtomicExpr::AO__atomic_min_fetch:
-  case AtomicExpr::AO__atomic_fetch_max:
-  case AtomicExpr::AO__hip_atomic_fetch_max:
-  case AtomicExpr::AO__atomic_fetch_min:
-  case AtomicExpr::AO__hip_atomic_fetch_min:
     Val1 = EmitValToTemp(*this, E->getVal1());
     break;
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to