gandhi21299 updated this revision to Diff 389834.
gandhi21299 added a comment.
- applied clang-format
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D114553/new/
https://reviews.llvm.org/D114553
Files:
clang/include/clang/AST/Expr.h
clang/include/clang/Basic/Builtins.def
clang/lib/AST/Expr.cpp
clang/lib/AST/StmtPrinter.cpp
clang/lib/CodeGen/CGAtomic.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGenCUDA/atomic-ops.cu
clang/test/SemaCUDA/atomic-ops.cu
Index: clang/test/SemaCUDA/atomic-ops.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/atomic-ops.cu
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -verify -fsyntax-only %s
+
+#include "Inputs/cuda.h"
+
+__device__ int test_hip_atomic_load(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl) {
+ int val = __hip_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}}
+ val = __hip_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
+ val = __hip_atomic_load(0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+ val = __hip_atomic_load(pi32, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_WORKGROUP);
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_AGENT);
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_SYSTEM);
+ val = __hip_atomic_load(pi32, 0, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+ val = __hip_atomic_load(pi32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pi32, 1, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pi32, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pi32, 3, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __hip_atomic_load(pu32, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pll, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(pull, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(fp, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(dbl, 2, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ return val;
+}
+
+__device__ int test_hip_atomic_store(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl,
+ int i32, unsigned int u32, long long i64, unsigned long long u64, float f32, double f64) {
+ __hip_atomic_store(0); // expected-error {{too few arguments to function call, expected 4, have 1}}
+ __hip_atomic_store(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
+ __hip_atomic_store(0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+ __hip_atomic_store(pi32, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_WORKGROUP);
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_AGENT);
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_SYSTEM);
+ __hip_atomic_store(pi32, 0, 0, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}}
+ __hip_atomic_store(pi32, 0, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, i32, 1, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning {{memory order argument to atomic operation is invalid}}
+ __hip_atomic_store(pi32, i32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, i32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pu32, u32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pll, i64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pull, u64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(fp, f32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(dbl, f64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, u32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, i64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pi32, u64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(pll, i32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(fp, i32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(fp, i64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(dbl, i64, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(dbl, i32, 0, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ return 0;
+}
+
+__device__ bool test_hip_atomic_cmpxchg_weak(int *ptr, int *ptr2, int val, int desired) {
+ //__hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ bool flag = __hip_atomic_compare_exchange_weak(0); // expected-error {{too few arguments to function call, expected 6, have 1}}
+ flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 6, have 7}}
+ flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+ flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}, expected-warning {{null passed to a callee that requires a non-null argument}}
+ flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, __HIP_MEMORY_SCOPE_SYSTEM); // expected-warning {{null passed to a callee that requires a non-null argument}}
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, 0, __HIP_MEMORY_SCOPE_SYSTEM);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, 1, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, 1, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, 1, __HIP_MEMORY_SCOPE_WORKGROUP);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, 1, __HIP_MEMORY_SCOPE_AGENT);
+ return flag;
+}
Index: clang/test/CodeGenCUDA/atomic-ops.cu
===================================================================
--- clang/test/CodeGenCUDA/atomic-ops.cu
+++ clang/test/CodeGenCUDA/atomic-ops.cu
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
#include "Inputs/cuda.h"
-// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
+// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiS_ii
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
@@ -10,7 +10,14 @@
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
+// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
+// CHECK: store i32 {{%[0-9]+}}, i32* %{{.*}}, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") acquire monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") release monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") acq_rel monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread") seq_cst monotonic, align 4
+__device__ int atomic32_op_singlethread(int *ptr, int *ptr2, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -19,6 +26,9 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return flag ? val : desired;
}
@@ -31,7 +41,7 @@
return val;
}
-// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
+// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiS_ii
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
@@ -40,7 +50,14 @@
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
+// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
+// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") acquire monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") release monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") acq_rel monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront") seq_cst monotonic, align 4
+__device__ int atomic32_op_wavefront(int *ptr, int *ptr2, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -49,6 +66,9 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
return flag ? val : desired;
}
@@ -61,7 +81,7 @@
return val;
}
-// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
+// CHECK-LABEL: @_Z21atomic32_op_workgroupPiS_ii
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
@@ -70,7 +90,13 @@
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
+// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") acquire monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") release monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") acq_rel monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup") seq_cst monotonic, align 4
+__device__ int atomic32_op_workgroup(int *ptr, int *ptr2, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -79,6 +105,8 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
return flag ? val : desired;
}
@@ -91,7 +119,7 @@
return val;
}
-// CHECK-LABEL: @_Z17atomic32_op_agentPiii
+// CHECK-LABEL: @_Z17atomic32_op_agentPiS_ii
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
@@ -100,7 +128,13 @@
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
+// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") acquire monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") release monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") acq_rel monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent") seq_cst monotonic, align 4
+__device__ int atomic32_op_agent(int *ptr, int *ptr2, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -109,6 +143,8 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
return flag ? val : desired;
}
@@ -121,7 +157,7 @@
return val;
}
-// CHECK-LABEL: @_Z18atomic32_op_systemPiii
+// CHECK-LABEL: @_Z18atomic32_op_systemPiS_ii
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
@@ -130,7 +166,14 @@
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-__device__ int atomic32_op_system(int *ptr, int val, int desired) {
+// CHECK: load i32, i32* %{{.*}}, align 4
+// CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") acquire monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") release monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") acq_rel monotonic, align 4
+// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst monotonic, align 4
+__device__ int atomic32_op_system(int *ptr, int *ptr2, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -139,6 +182,9 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
return flag ? val : desired;
}
@@ -151,7 +197,7 @@
return val;
}
-// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxxx
+// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
@@ -160,7 +206,13 @@
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-__device__ long long atomic64_op_singlethread(long long *ptr, long long val, long long desired) {
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") acquire monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") release monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") acq_rel monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread") seq_cst monotonic, align 8
+__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -169,19 +221,25 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyyy
+// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
+// CHECK: load atomic i64, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return val;
}
-// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxxx
+// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
@@ -190,7 +248,14 @@
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-__device__ long long atomic64_op_wavefront(long long *ptr, long long val, long long desired) {
+// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") acquire monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") release monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") acq_rel monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront") seq_cst monotonic, align 8
+__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -199,19 +264,26 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyyy
+// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
+// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
return val;
}
-// CHECK-LABEL: @_Z21atomic64_op_workgroupPxxx
+// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
@@ -220,7 +292,13 @@
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-__device__ long long atomic64_op_workgroup(long long *ptr, long long val, long long desired) {
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") acquire monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") release monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") acq_rel monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup") seq_cst monotonic, align 8
+__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -229,19 +307,23 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyyy
+// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
return val;
}
-// CHECK-LABEL: @_Z17atomic64_op_agentPxxx
+// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
@@ -250,7 +332,13 @@
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-__device__ long long atomic64_op_agent(long long *ptr, long long val, long long desired) {
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") acquire monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") release monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") acq_rel monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent") seq_cst monotonic, align 8
+__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -259,19 +347,23 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z18atomicu64_op_agentPyyy
+// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
return val;
}
-// CHECK-LABEL: @_Z18atomic64_op_systemPxxx
+// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
@@ -280,7 +372,14 @@
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-__device__ long long atomic64_op_system(long long *ptr, long long val, long long desired) {
+// CHECK: load i64, i64* %{{.*}}, align 8
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") acquire monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") release monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") acq_rel monotonic, align 8
+// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} seq_cst monotonic, align 8
+__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -289,14 +388,21 @@
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ flag = __hip_atomic_compare_exchange_weak(ptr, ptr2, val, desired, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
return flag ? val : desired;
}
-// CHECK-LABEL: @_Z19atomicu64_op_systemPyyy
+// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
+// CHECK: load i64, i64* %{{.*}}, align 8
+// CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 8
+__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
+ __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
return val;
}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -5297,6 +5297,7 @@
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load:
return OrderingCABI != llvm::AtomicOrderingCABI::release &&
@@ -5304,6 +5305,7 @@
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
@@ -5380,7 +5382,7 @@
"need to update code for modified C11 atomics");
bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
Op <= AtomicExpr::AO__opencl_atomic_fetch_max;
- bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_compare_exchange_strong &&
+ bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_load &&
Op <= AtomicExpr::AO__hip_atomic_fetch_max;
bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
Op <= AtomicExpr::AO__c11_atomic_fetch_min) ||
@@ -5399,6 +5401,7 @@
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
Form = Load;
break;
@@ -5409,6 +5412,7 @@
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
Form = Copy;
@@ -5474,6 +5478,7 @@
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
Form = C11CmpXchg;
break;
@@ -5802,11 +5807,14 @@
if ((Op == AtomicExpr::AO__c11_atomic_load ||
Op == AtomicExpr::AO__c11_atomic_store ||
Op == AtomicExpr::AO__opencl_atomic_load ||
- Op == AtomicExpr::AO__opencl_atomic_store ) &&
+ Op == AtomicExpr::AO__hip_atomic_load ||
+ Op == AtomicExpr::AO__opencl_atomic_store ||
+ Op == AtomicExpr::AO__hip_atomic_store) &&
Context.AtomicUsesUnsupportedLibcall(AE))
Diag(AE->getBeginLoc(), diag::err_atomic_load_store_uses_lib)
<< ((Op == AtomicExpr::AO__c11_atomic_load ||
- Op == AtomicExpr::AO__opencl_atomic_load)
+ Op == AtomicExpr::AO__opencl_atomic_load ||
+ Op == AtomicExpr::AO__hip_atomic_load)
? 0
: 1);
Index: clang/lib/CodeGen/CGAtomic.cpp
===================================================================
--- clang/lib/CodeGen/CGAtomic.cpp
+++ clang/lib/CodeGen/CGAtomic.cpp
@@ -531,6 +531,7 @@
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
FailureOrder, Size, Order, Scope);
return;
@@ -566,6 +567,7 @@
}
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
@@ -577,6 +579,7 @@
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
@@ -846,6 +849,7 @@
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
break;
@@ -867,6 +871,7 @@
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__atomic_compare_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
@@ -911,6 +916,7 @@
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__atomic_store_n:
@@ -1038,10 +1044,13 @@
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_store_n:
@@ -1103,6 +1112,7 @@
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_compare_exchange:
@@ -1135,6 +1145,7 @@
// void __atomic_store_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
LibCallName = "__atomic_store";
@@ -1147,6 +1158,7 @@
// T __atomic_load_N(T *mem, int order)
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_load_n:
LibCallName = "__atomic_load";
@@ -1323,10 +1335,12 @@
bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
+ E->getOp() == AtomicExpr::AO__hip_atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store_n;
bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
+ E->getOp() == AtomicExpr::AO__hip_atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load_n;
Index: clang/lib/AST/StmtPrinter.cpp
===================================================================
--- clang/lib/AST/StmtPrinter.cpp
+++ clang/lib/AST/StmtPrinter.cpp
@@ -1691,7 +1691,8 @@
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
- Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
+ Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
+ Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
OS << ", ";
PrintExpr(Node->getVal1());
}
Index: clang/lib/AST/Expr.cpp
===================================================================
--- clang/lib/AST/Expr.cpp
+++ clang/lib/AST/Expr.cpp
@@ -4681,6 +4681,7 @@
return 2;
case AO__opencl_atomic_load:
+ case AO__hip_atomic_load:
case AO__c11_atomic_store:
case AO__c11_atomic_exchange:
case AO__atomic_load:
@@ -4721,6 +4722,7 @@
case AO__hip_atomic_fetch_min:
case AO__hip_atomic_fetch_max:
case AO__opencl_atomic_store:
+ case AO__hip_atomic_store:
case AO__opencl_atomic_exchange:
case AO__opencl_atomic_fetch_add:
case AO__opencl_atomic_fetch_sub:
@@ -4738,6 +4740,7 @@
case AO__hip_atomic_compare_exchange_strong:
case AO__opencl_atomic_compare_exchange_strong:
case AO__opencl_atomic_compare_exchange_weak:
+ case AO__hip_atomic_compare_exchange_weak:
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -855,8 +855,9 @@
ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t")
// HIP atomic builtins.
-// FIXME: Is `__hip_atomic_compare_exchange_n` or
-// `__hip_atomic_compare_exchange_weak` needed?
+ATOMIC_BUILTIN(__hip_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_compare_exchange_weak, "v.", "t")
ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t")
ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t")
ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t")
Index: clang/include/clang/AST/Expr.h
===================================================================
--- clang/include/clang/AST/Expr.h
+++ clang/include/clang/AST/Expr.h
@@ -6308,6 +6308,7 @@
getOp() == AO__hip_atomic_compare_exchange_strong ||
getOp() == AO__opencl_atomic_compare_exchange_strong ||
getOp() == AO__opencl_atomic_compare_exchange_weak ||
+ getOp() == AO__hip_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
getOp() == AO__atomic_compare_exchange_n;
}
@@ -6342,8 +6343,7 @@
auto Kind =
(Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
? AtomicScopeModelKind::OpenCL
- : (Op >= AO__hip_atomic_compare_exchange_strong &&
- Op <= AO__hip_atomic_fetch_max)
+ : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
? AtomicScopeModelKind::HIP
: AtomicScopeModelKind::None;
return AtomicScopeModel::create(Kind);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits