t.p.northover created this revision. t.p.northover added a subscriber: cfe-commits. t.p.northover set the repository for this revision to rL LLVM.
When a struct's size is not a power of 2, the corresponding _Atomic() type is promoted to the nearest. We already correctly handled normal C++ expressions of this form, but direct calls to the __c11_atomic_whatever builtins ended up performing dodgy operations on the smaller non-atomic types (e.g. memcpy too much). Later optimisations removed this as undefined behaviour. This patch converts EmitAtomicExpr to allocate its temporaries at the full atomic width, sidestepping the issue. It also tidies up that function a little: previously there was a confusing dual-return situation, where sometimes the result was returned as an RValue, other times stored into a user-provided destination. I don't think this is necessary (it dates back from the very beginning of CGAtomic.cpp). Repository: rL LLVM http://reviews.llvm.org/D13874 Files: lib/CodeGen/CGAtomic.cpp lib/CodeGen/CGExprAgg.cpp lib/CodeGen/CodeGenFunction.h test/CodeGen/atomic-arm64.c test/CodeGen/atomic-ops.c test/CodeGen/c11atomics-ios.c test/CodeGen/c11atomics.c
Index: test/CodeGen/c11atomics.c =================================================================== --- test/CodeGen/c11atomics.c +++ test/CodeGen/c11atomics.c @@ -367,14 +367,111 @@ // CHECK-NEXT: ret void } -// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* - -// FIXME: none of these look right, but we can leave the "test" here -// to make sure they at least don't crash. -void testPromotedStructOps(_Atomic(PS) *p) { - PS a = __c11_atomic_load(p, 5); - __c11_atomic_store(p, a, 5); - PS b = __c11_atomic_exchange(p, a, 5); - _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); - v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); +PS test_promoted_load(_Atomic(PS) *addr) { + // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5) + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + + return __c11_atomic_load(addr, 5); +} + +void test_promoted_store(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2 + // CHECK: call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5) + __c11_atomic_store(addr, *val, 5); +} + +PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2 + // CHECK: [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5) + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + return __c11_atomic_exchange(addr, *val, 5); +} + +_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) { + // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 { + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // %cmpxchg.bool = alloca i8, align 1 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8* + // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64* + // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8* + // CHECK: [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2 + // CHECK: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5) + // CHECK: ret i1 [[RES]] + return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5); } Index: test/CodeGen/c11atomics-ios.c =================================================================== --- test/CodeGen/c11atomics-ios.c +++ test/CodeGen/c11atomics-ios.c @@ -202,11 +202,120 @@ // CHECK-NEXT: ret void } -void testPromotedStructOps(_Atomic(PS) *p) { - PS a = __c11_atomic_load(p, 5); - __c11_atomic_store(p, a, 5); - PS b = __c11_atomic_exchange(p, a, 5); +PS test_promoted_load(_Atomic(PS) *addr) { + // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[VAL:%.*]] = load atomic i64, i64* [[ADDR64]] seq_cst, align 8 + // CHECK: store i64 [[VAL]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) - _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); - v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); + return __c11_atomic_load(addr, 5); +} + +void test_promoted_store(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 + // CHECK: store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8 + + __c11_atomic_store(addr, *val, 5); +} + +PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 + // CHECK: [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + return __c11_atomic_exchange(addr, *val, 5); +} + +_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) { + // CHECK: define zeroext i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 { + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[RES_ADDR:%.*]] = alloca i8, align 1 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[DESIRED:%.*]] = load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8* + // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64* + // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* + // CHECK: [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8 + // CHECK: [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8 + // CHECK: [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst + // CHECK: [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0 + // CHECK: [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1 + // CHECK: br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}} + + // CHECK: store i64 [[RES_VAL64]], i64* [[ATOMIC_DESIRED64]], align 8 + // CHECK: br label {{%.*}} + + // CHECK: [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8 + // CHECK: store i8 [[RES_BOOL8]], i8* [[RES_ADDR]], align 1 + // CHECK: [[RES_BOOL8:%.*]] = load i8, i8* [[RES_ADDR]], align 1 + // CHECK: [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1 + // CHECK: ret i1 [[RETVAL]] + + return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5); } Index: test/CodeGen/atomic-ops.c =================================================================== --- test/CodeGen/atomic-ops.c +++ test/CodeGen/atomic-ops.c @@ -179,8 +179,8 @@ // CHECK-LABEL: @fd1 // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4 // CHECK: [[RET:%.*]] = alloca %struct.S, align 4 - // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( // CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64* + // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4 struct S ret; __atomic_load(a, &ret, memory_order_seq_cst); @@ -195,8 +195,9 @@ // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4 // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]], // CHECK-NEXT: ret void @@ -214,11 +215,12 @@ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]], - // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4 __atomic_exchange(a, b, c, memory_order_seq_cst); @@ -235,9 +237,11 @@ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* - // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* + // CHECK-NEXT: [[COERCED_B_TMP:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* + // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast i64* [[COERCED_B_TMP]] to i8* // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]] // CHECK-NEXT: ret i1 [[CALL]] @@ -312,6 +316,8 @@ char c[17]; } seventeen; +struct Incomplete; + int lock_free(struct Incomplete *incomplete) { // CHECK-LABEL: @lock_free Index: test/CodeGen/atomic-arm64.c =================================================================== --- test/CodeGen/atomic-arm64.c +++ test/CodeGen/atomic-arm64.c @@ -66,8 +66,9 @@ // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8* // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false) -// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8* -// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T0]], i32 5) +// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256* +// CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8* +// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5) void test4(pointer_quad_t quad) { __c11_atomic_store(&a_pointer_quad, quad, memory_order_seq_cst); } Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2930,7 +2930,7 @@ void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest); - RValue EmitAtomicExpr(AtomicExpr *E, Address Dest = Address::invalid()); + RValue EmitAtomicExpr(AtomicExpr *E); //===--------------------------------------------------------------------===// // Annotations Emission Index: lib/CodeGen/CGExprAgg.cpp =================================================================== --- lib/CodeGen/CGExprAgg.cpp +++ lib/CodeGen/CGExprAgg.cpp @@ -199,7 +199,8 @@ // case Expr::ChooseExprClass: void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); } void VisitAtomicExpr(AtomicExpr *E) { - CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddress()); + RValue Res = CGF.EmitAtomicExpr(E); + EmitFinalDestCopy(E->getType(), Res); } }; } // end anonymous namespace. Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -173,9 +173,14 @@ return CGF.CGM.getSize(size); } - /// Cast the given pointer to an integer pointer suitable for - /// atomic operations. - Address emitCastToAtomicIntPointer(Address addr) const; + /// Cast the given pointer to an integer pointer suitable for atomic + /// operations if the source. + Address emitCastToAtomicIntPointer(Address Addr) const; + + /// If Addr is compatible with the iN that will be used for an atomic + /// operation, bitcast it. Otherwise, create a temporary that is suitable + /// and copy the value across. + Address convertToAtomicIntPointer(Address Addr) const; /// Turn an atomic-layout object into an r-value. RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, @@ -241,11 +246,11 @@ static AtomicExpr::AtomicOrderingKind translateAtomicOrdering(const llvm::AtomicOrdering AO); + /// \brief Creates temp alloca for intermediate operations on atomic value. + Address CreateTempAlloca() const; private: bool requiresMemSetZero(llvm::Type *type) const; - /// \brief Creates temp alloca for intermediate operations on atomic value. - Address CreateTempAlloca() const; /// \brief Emits atomic load as a libcall. void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, @@ -554,7 +559,6 @@ case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { - assert(!Dest.isValid() && "Store does not return a value"); llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); Store->setAtomic(Order); @@ -666,7 +670,7 @@ } } -RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, Address Dest) { +RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { QualType AtomicTy = E->getPtr()->getType()->getPointeeType(); QualType MemTy = AtomicTy; if (const AtomicType *AT = AtomicTy->getAs<AtomicType>()) @@ -682,10 +686,10 @@ Address Val1 = Address::invalid(); Address Val2 = Address::invalid(); + Address Dest = Address::invalid(); Address Ptr(EmitScalarExpr(E->getPtr()), alignChars); if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { - assert(!Dest.isValid() && "Init does not return a value"); LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); EmitAtomicInit(E->getVal1(), lvalue); return RValue::get(nullptr); @@ -771,12 +775,21 @@ QualType RValTy = E->getType().getUnqualifiedType(); - auto GetDest = [&] { - if (!RValTy->isVoidType() && !Dest.isValid()) { - Dest = CreateMemTemp(RValTy, ".atomicdst"); - } - return Dest; - }; + // The inlined atomics only function on iN types, where N is a power of 2. We + // need to make sure (via temporaries if necessary) that all incoming values + // are compatible. + LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy); + AtomicInfo Atomics(*this, AtomicVal); + + Ptr = Atomics.emitCastToAtomicIntPointer(Ptr); + if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1); + if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2); + if (Dest.isValid()) + Dest = Atomics.emitCastToAtomicIntPointer(Dest); + else if (E->isCmpXChg()) + Dest = CreateMemTemp(RValTy, "cmpxchg.bool"); + else if (!RValTy->isVoidType()) + Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca()); // Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary . if (UseLibcall) { @@ -996,19 +1009,24 @@ RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args); // The value is returned directly from the libcall. - if (HaveRetTy && !RetTy->isVoidType()) + if (E->isCmpXChg()) return Res; - // The value is returned via an explicit out param. - if (RetTy->isVoidType()) - return RValue::get(nullptr); - // The value is returned directly for optimized libcalls but the caller is - // expected an out-param. - if (UseOptimizedLibcall) { + + // The value is returned directly for optimized libcalls but the expr + // provided an out-param. + if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); - Builder.CreateStore(ResVal, - Builder.CreateBitCast(GetDest(), ResVal->getType()->getPointerTo())); + Builder.CreateStore( + ResVal, + Builder.CreateBitCast(Dest, ResVal->getType()->getPointerTo())); } - return convertTempToRValue(Dest, RValTy, E->getExprLoc()); + + if (RValTy->isVoidType()) + return RValue::get(nullptr); + + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || @@ -1018,16 +1036,6 @@ E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; - llvm::Type *ITy = - llvm::IntegerType::get(getLLVMContext(), Size * 8); - Address OrigDest = GetDest(); - Ptr = Builder.CreateBitCast( - Ptr, ITy->getPointerTo(Ptr.getType()->getPointerAddressSpace())); - if (Val1.isValid()) Val1 = Builder.CreateBitCast(Val1, ITy->getPointerTo()); - if (Val2.isValid()) Val2 = Builder.CreateBitCast(Val2, ITy->getPointerTo()); - if (Dest.isValid() && !E->isCmpXChg()) - Dest = Builder.CreateBitCast(Dest, ITy->getPointerTo()); - if (isa<llvm::ConstantInt>(Order)) { int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); switch (ord) { @@ -1065,7 +1073,10 @@ } if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue(OrigDest, RValTy, E->getExprLoc()); + + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } // Long case, when Order isn't obviously constant. @@ -1133,7 +1144,11 @@ Builder.SetInsertPoint(ContBB); if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue(OrigDest, RValTy, E->getExprLoc()); + + assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const { @@ -1144,6 +1159,19 @@ return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace)); } +Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const { + llvm::Type *Ty = Addr.getElementType(); + uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty); + if (SourceSizeInBits != AtomicSizeInBits) { + Address Tmp = CreateTempAlloca(); + CGF.Builder.CreateMemCpy(Tmp, Addr, + std::min(AtomicSizeInBits, SourceSizeInBits) / 8); + Addr = Tmp; + } + + return emitCastToAtomicIntPointer(Addr); +} + RValue AtomicInfo::convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, SourceLocation loc,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits