https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716
>From 45ccff21b3332127625a76eae10e62f7e8fe3389 Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 18 ++++- llvm/test/CodeGen/ARM/atomic-load-store.ll | 51 +++++++++++++++ llvm/test/CodeGen/X86/atomic-load-store.ll | 30 +++++++++ .../X86/expand-atomic-non-integer.ll | 65 +++++++++++++++++++ 4 files changed, 161 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..644977f225d834 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2060,9 +2060,21 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; - if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); - else { + if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's <n x ptr> vector type + if (I->getType()->getScalarType()->isPointerTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { + unsigned AS = cast<PointerType>( + I->getType()->getScalarType())->getAddressSpace(); + ElementCount EC = cast<VectorType>(I->getType())->getElementCount(); + Value *BC = Builder.CreateBitCast(Result, VectorType::get( + IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), EC)); + Value *IntToPtr = Builder.CreateIntToPtr(BC, VectorType::get( + PointerType::get(Ctx, AS), EC)); + V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType()); + } else + V = Builder.CreateBitOrPointerCast(Result, I->getType()); + } else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..36c1305a7c5df3 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldr r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldr r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #2 +; ARMV4-NEXT: bl __atomic_load_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: ldr r0, [r0] +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldr r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f573..4293df8c13571f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: callq ___atomic_load_16 +; CHECK-NEXT: movq %rdx, %xmm1 +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -394,6 +409,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: callq ___atomic_load_16 +; CHECK-NEXT: movq %rdx, %xmm1 +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} + define <4 x half> @atomic_vec4_half(ptr %x) nounwind { ; CHECK-LABEL: atomic_vec4_half: ; CHECK: ## %bb.0: diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll index 5929c153d5961d..322f74f539ca36 100644 --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -151,3 +151,68 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ret void } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr> +; CHECK-NEXT: ret <2 x ptr> [[TMP7]] +; + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + +define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)> +; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]] +; + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind { +; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8 +; CHECK-NEXT: ret <2 x i16> [[RET]] +; + %ret = load atomic <2 x i16>, ptr %x acquire, align 8 + ret <2 x i16> %ret +} + +define <2 x half> @atomic_vec2_half(ptr %x) nounwind { +; CHECK-LABEL: define <2 x half> @atomic_vec2_half( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8 +; CHECK-NEXT: ret <2 x half> [[RET]] +; + %ret = load atomic <2 x half>, ptr %x acquire, align 8 + ret <2 x half> %ret +} + +define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind { +; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %ret = load atomic <4 x i32>, ptr %x acquire, align 16 + ret <4 x i32> %ret +} + +define <4 x float> @atomic_vec4_float(ptr %x) nounwind { +; CHECK-LABEL: define <4 x float> @atomic_vec4_float( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float> +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits