[PATCH] D13874: Atomics: support __c11_* calls on _Atomic struct types

Tim Northover via cfe-commits Mon, 19 Oct 2015 14:07:22 -0700

t.p.northover created this revision.
t.p.northover added a subscriber: cfe-commits.
t.p.northover set the repository for this revision to rL LLVM.


When a struct's size is not a power of 2, the corresponding _Atomic() type is 
promoted to the nearest. We already correctly handled normal C++ expressions of 
this form, but direct calls to the __c11_atomic_whatever builtins ended up 
performing dodgy operations on the smaller non-atomic types (e.g. memcpy too 
much). Later optimisations removed this as undefined behaviour.

This patch converts EmitAtomicExpr to allocate its temporaries at the full 
atomic width, sidestepping the issue.

It also tidies up that function a little: previously there was a confusing 
dual-return situation, where sometimes the result was returned as an RValue, 
other times stored into a user-provided destination. I don't think this is 
necessary (it dates back from the very beginning of CGAtomic.cpp).

Repository:
  rL LLVM

http://reviews.llvm.org/D13874

Files:
  lib/CodeGen/CGAtomic.cpp
  lib/CodeGen/CGExprAgg.cpp
  lib/CodeGen/CodeGenFunction.h
  test/CodeGen/atomic-arm64.c
  test/CodeGen/atomic-ops.c
  test/CodeGen/c11atomics-ios.c
  test/CodeGen/c11atomics.c

Index: test/CodeGen/c11atomics.c
===================================================================
--- test/CodeGen/c11atomics.c
+++ test/CodeGen/c11atomics.c
@@ -367,14 +367,111 @@
 // CHECK-NEXT: ret void
 }
 
-// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
-
-// FIXME: none of these look right, but we can leave the "test" here
-// to make sure they at least don't crash.
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // %cmpxchg.bool = alloca i8, align 1
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8*
+  // CHECK:   [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5)
+  // CHECK:   ret i1 [[RES]]
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
Index: test/CodeGen/c11atomics-ios.c
===================================================================
--- test/CodeGen/c11atomics-ios.c
+++ test/CodeGen/c11atomics-ios.c
@@ -202,11 +202,120 @@
 // CHECK-NEXT: ret void
 }
 
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL:%.*]] = load atomic i64, i64* [[ADDR64]] seq_cst, align 8
+  // CHECK:   store i64 [[VAL]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
 
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8
+
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK:   define zeroext i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[RES_ADDR:%.*]] = alloca i8, align 1
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]] = load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8
+  // CHECK:   [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst
+  // CHECK:   [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
+  // CHECK:   [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
+  // CHECK:   br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}
+
+  // CHECK:   store i64 [[RES_VAL64]], i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   br label {{%.*}}
+
+  // CHECK:   [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8
+  // CHECK:   store i8 [[RES_BOOL8]], i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RES_BOOL8:%.*]] = load i8, i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1
+  // CHECK:   ret i1 [[RETVAL]]
+
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
Index: test/CodeGen/atomic-ops.c
===================================================================
--- test/CodeGen/atomic-ops.c
+++ test/CodeGen/atomic-ops.c
@@ -179,8 +179,8 @@
   // CHECK-LABEL: @fd1
   // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
   // CHECK: [[RET:%.*]]    = alloca %struct.S, align 4
-  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: [[CAST:%.*]]   = bitcast %struct.S* [[RET]] to i64*
+  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4
   struct S ret;
   __atomic_load(a, &ret, memory_order_seq_cst);
@@ -195,8 +195,9 @@
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
   // CHECK-NEXT: ret void
@@ -214,11 +215,12 @@
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
-  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
   // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
 
   __atomic_exchange(a, b, c, memory_order_seq_cst);
@@ -235,9 +237,11 @@
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
-  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_B_TMP:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
+  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast i64* [[COERCED_B_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
   // CHECK-NEXT: ret i1 [[CALL]]
@@ -312,6 +316,8 @@
   char c[17];
 } seventeen;
 
+struct Incomplete;
+
 int lock_free(struct Incomplete *incomplete) {
   // CHECK-LABEL: @lock_free
 
Index: test/CodeGen/atomic-arm64.c
===================================================================
--- test/CodeGen/atomic-arm64.c
+++ test/CodeGen/atomic-arm64.c
@@ -66,8 +66,9 @@
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T0]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256*
+// CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8*
+// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5)
 void test4(pointer_quad_t quad) {
   __c11_atomic_store(&a_pointer_quad, quad, memory_order_seq_cst);
 }
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2930,7 +2930,7 @@
 
   void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest);
 
-  RValue EmitAtomicExpr(AtomicExpr *E, Address Dest = Address::invalid());
+  RValue EmitAtomicExpr(AtomicExpr *E);
 
   //===--------------------------------------------------------------------===//
   //                         Annotations Emission
Index: lib/CodeGen/CGExprAgg.cpp
===================================================================
--- lib/CodeGen/CGExprAgg.cpp
+++ lib/CodeGen/CGExprAgg.cpp
@@ -199,7 +199,8 @@
   //  case Expr::ChooseExprClass:
   void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); }
   void VisitAtomicExpr(AtomicExpr *E) {
-    CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddress());
+    RValue Res = CGF.EmitAtomicExpr(E);
+    EmitFinalDestCopy(E->getType(), Res);
   }
 };
 }  // end anonymous namespace.
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -173,9 +173,14 @@
       return CGF.CGM.getSize(size);
     }
 
-    /// Cast the given pointer to an integer pointer suitable for
-    /// atomic operations.
-    Address emitCastToAtomicIntPointer(Address addr) const;
+    /// Cast the given pointer to an integer pointer suitable for atomic
+    /// operations if the source.
+    Address emitCastToAtomicIntPointer(Address Addr) const;
+
+    /// If Addr is compatible with the iN that will be used for an atomic
+    /// operation, bitcast it. Otherwise, create a temporary that is suitable
+    /// and copy the value across.
+    Address convertToAtomicIntPointer(Address Addr) const;
 
     /// Turn an atomic-layout object into an r-value.
     RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
@@ -241,11 +246,11 @@
     static AtomicExpr::AtomicOrderingKind
     translateAtomicOrdering(const llvm::AtomicOrdering AO);
 
+    /// \brief Creates temp alloca for intermediate operations on atomic value.
+    Address CreateTempAlloca() const;
   private:
     bool requiresMemSetZero(llvm::Type *type) const;
 
-    /// \brief Creates temp alloca for intermediate operations on atomic value.
-    Address CreateTempAlloca() const;
 
     /// \brief Emits atomic load as a libcall.
     void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
@@ -554,7 +559,6 @@
   case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n: {
-    assert(!Dest.isValid() && "Store does not return a value");
     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
     Store->setAtomic(Order);
@@ -666,7 +670,7 @@
   }
 }
 
-RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, Address Dest) {
+RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
@@ -682,10 +686,10 @@
 
   Address Val1 = Address::invalid();
   Address Val2 = Address::invalid();
+  Address Dest = Address::invalid();
   Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
-    assert(!Dest.isValid() && "Init does not return a value");
     LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
     EmitAtomicInit(E->getVal1(), lvalue);
     return RValue::get(nullptr);
@@ -771,12 +775,21 @@
 
   QualType RValTy = E->getType().getUnqualifiedType();
 
-  auto GetDest = [&] {
-    if (!RValTy->isVoidType() && !Dest.isValid()) {
-      Dest = CreateMemTemp(RValTy, ".atomicdst");
-    }
-    return Dest;
-  };
+  // The inlined atomics only function on iN types, where N is a power of 2. We
+  // need to make sure (via temporaries if necessary) that all incoming values
+  // are compatible.
+  LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
+  AtomicInfo Atomics(*this, AtomicVal);
+
+  Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
+  if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1);
+  if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2);
+  if (Dest.isValid())
+    Dest = Atomics.emitCastToAtomicIntPointer(Dest);
+  else if (E->isCmpXChg())
+    Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
+  else if (!RValTy->isVoidType())
+    Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca());
 
   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
   if (UseLibcall) {
@@ -996,19 +1009,24 @@
 
     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
     // The value is returned directly from the libcall.
-    if (HaveRetTy && !RetTy->isVoidType())
+    if (E->isCmpXChg())
       return Res;
-    // The value is returned via an explicit out param.
-    if (RetTy->isVoidType())
-      return RValue::get(nullptr);
-    // The value is returned directly for optimized libcalls but the caller is
-    // expected an out-param.
-    if (UseOptimizedLibcall) {
+
+    // The value is returned directly for optimized libcalls but the expr
+    // provided an out-param.
+    if (UseOptimizedLibcall && Res.getScalarVal()) {
       llvm::Value *ResVal = Res.getScalarVal();
-      Builder.CreateStore(ResVal,
-          Builder.CreateBitCast(GetDest(), ResVal->getType()->getPointerTo()));
+      Builder.CreateStore(
+          ResVal,
+          Builder.CreateBitCast(Dest, ResVal->getType()->getPointerTo()));
     }
-    return convertTempToRValue(Dest, RValTy, E->getExprLoc());
+
+    if (RValTy->isVoidType())
+      return RValue::get(nullptr);
+
+    return convertTempToRValue(
+        Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+        RValTy, E->getExprLoc());
   }
 
   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
@@ -1018,16 +1036,6 @@
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
-  llvm::Type *ITy =
-      llvm::IntegerType::get(getLLVMContext(), Size * 8);
-  Address OrigDest = GetDest();
-  Ptr = Builder.CreateBitCast(
-      Ptr, ITy->getPointerTo(Ptr.getType()->getPointerAddressSpace()));
-  if (Val1.isValid()) Val1 = Builder.CreateBitCast(Val1, ITy->getPointerTo());
-  if (Val2.isValid()) Val2 = Builder.CreateBitCast(Val2, ITy->getPointerTo());
-  if (Dest.isValid() && !E->isCmpXChg())
-    Dest = Builder.CreateBitCast(Dest, ITy->getPointerTo());
-
   if (isa<llvm::ConstantInt>(Order)) {
     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     switch (ord) {
@@ -1065,7 +1073,10 @@
     }
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
-    return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
+
+    return convertTempToRValue(
+        Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+        RValTy, E->getExprLoc());
   }
 
   // Long case, when Order isn't obviously constant.
@@ -1133,7 +1144,11 @@
   Builder.SetInsertPoint(ContBB);
   if (RValTy->isVoidType())
     return RValue::get(nullptr);
-  return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
+
+  assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
+  return convertTempToRValue(
+      Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+      RValTy, E->getExprLoc());
 }
 
 Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const {
@@ -1144,6 +1159,19 @@
   return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
 }
 
+Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
+  llvm::Type *Ty = Addr.getElementType();
+  uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
+  if (SourceSizeInBits != AtomicSizeInBits) {
+    Address Tmp = CreateTempAlloca();
+    CGF.Builder.CreateMemCpy(Tmp, Addr,
+                             std::min(AtomicSizeInBits, SourceSizeInBits) / 8);
+    Addr = Tmp;
+  }
+
+  return emitCastToAtomicIntPointer(Addr);
+}
+
 RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
                                              AggValueSlot resultSlot,
                                              SourceLocation loc,

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D13874: Atomics: support __c11_* calls on _Atomic struct types

Reply via email to