https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/148900

>From a9e62b36fb879b7b0278d299df64e11ba6605041 Mon Sep 17 00:00:00 2001
From: jofrn <[email protected]>
Date: Tue, 15 Jul 2025 13:03:15 -0400
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic <n x T>` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered. It also adds support for
128 bit lowering in tablegen to support SSE/AVX.
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |  19 +-
 llvm/test/CodeGen/ARM/atomic-load-store.ll    |  51 ++++
 llvm/test/CodeGen/X86/atomic-load-store.ll    |  91 +++++-
 .../X86/expand-atomic-non-integer.ll          | 287 ++++++++++++++----
 4 files changed, 382 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 53f1cfe24a68d..8dc14bb416345 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -483,7 +483,9 @@ LoadInst 
*AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
   NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
   LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
 
-  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+  Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
+                      ? Builder.CreateIntToPtr(NewLI, LI->getType())
+                      : Builder.CreateBitCast(NewLI, LI->getType());
   LI->replaceAllUsesWith(NewVal);
   LI->eraseFromParent();
   return NewLI;
@@ -2093,9 +2095,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
     I->replaceAllUsesWith(V);
   } else if (HasResult) {
     Value *V;
-    if (UseSizedLibcall)
-      V = Builder.CreateBitOrPointerCast(Result, I->getType());
-    else {
+    if (UseSizedLibcall) {
+      // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
+      auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
+      auto *VTy = dyn_cast<VectorType>(I->getType());
+      if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+        unsigned AS = PtrTy->getAddressSpace();
+        Value *BC = Builder.CreateBitCast(
+            Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+        V = Builder.CreateIntToPtr(BC, I->getType());
+      } else
+        V = Builder.CreateBitOrPointerCast(Result, I->getType());
+    } else {
       V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
                                     AllocaAlignment);
       Builder.CreateLifetimeEnd(AllocaResult);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    dmb ish
+; ARM-NEXT:    bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:       @ %bb.0:
+; ARMOPTNONE-NEXT:    ldr r0, [r0]
+; ARMOPTNONE-NEXT:    dmb ish
+; ARMOPTNONE-NEXT:    bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:       @ %bb.0:
+; THUMBTWO-NEXT:    ldr r0, [r0]
+; THUMBTWO-NEXT:    dmb ish
+; THUMBTWO-NEXT:    bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:       @ %bb.0:
+; THUMBONE-NEXT:    push {r7, lr}
+; THUMBONE-NEXT:    movs r1, #0
+; THUMBONE-NEXT:    mov r2, r1
+; THUMBONE-NEXT:    bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:    pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:       @ %bb.0:
+; ARMV4-NEXT:    push {r11, lr}
+; ARMV4-NEXT:    mov r1, #2
+; ARMV4-NEXT:    bl __atomic_load_4
+; ARMV4-NEXT:    pop {r11, lr}
+; ARMV4-NEXT:    mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:       @ %bb.0:
+; ARMV6-NEXT:    ldr r0, [r0]
+; ARMV6-NEXT:    mov r1, #0
+; ARMV6-NEXT:    mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:    bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:       @ %bb.0:
+; THUMBM-NEXT:    ldr r0, [r0]
+; THUMBM-NEXT:    dmb sy
+; THUMBM-NEXT:    bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 00310f6d1f219..867a4acb791bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -244,6 +244,96 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr 
%x) {
   %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
   ret <2 x ptr addrspace(270)> %ret
 }
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-SSE2-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE2-O3:       # %bb.0:
+; CHECK-SSE2-O3-NEXT:    pushq %rax
+; CHECK-SSE2-O3-NEXT:    movl $2, %esi
+; CHECK-SSE2-O3-NEXT:    callq __atomic_load_16@PLT
+; CHECK-SSE2-O3-NEXT:    movq %rdx, %xmm1
+; CHECK-SSE2-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE2-O3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O3-NEXT:    popq %rax
+; CHECK-SSE2-O3-NEXT:    retq
+;
+; CHECK-SSE4-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE4-O3:       # %bb.0:
+; CHECK-SSE4-O3-NEXT:    movaps (%rdi), %xmm0
+; CHECK-SSE4-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE2-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE2-O0:       # %bb.0:
+; CHECK-SSE2-O0-NEXT:    pushq %rax
+; CHECK-SSE2-O0-NEXT:    movl $2, %esi
+; CHECK-SSE2-O0-NEXT:    callq __atomic_load_16@PLT
+; CHECK-SSE2-O0-NEXT:    movq %rdx, %xmm1
+; CHECK-SSE2-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE2-O0-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O0-NEXT:    popq %rax
+; CHECK-SSE2-O0-NEXT:    retq
+;
+; CHECK-SSE4-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE4-O0:       # %bb.0:
+; CHECK-SSE4-O0-NEXT:    movapd (%rdi), %xmm0
+; CHECK-SSE4-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    vmovapd (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-SSE2-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE2-O3:       # %bb.0:
+; CHECK-SSE2-O3-NEXT:    pushq %rax
+; CHECK-SSE2-O3-NEXT:    movl $2, %esi
+; CHECK-SSE2-O3-NEXT:    callq __atomic_load_16@PLT
+; CHECK-SSE2-O3-NEXT:    movq %rdx, %xmm1
+; CHECK-SSE2-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE2-O3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O3-NEXT:    popq %rax
+; CHECK-SSE2-O3-NEXT:    retq
+;
+; CHECK-SSE4-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE4-O3:       # %bb.0:
+; CHECK-SSE4-O3-NEXT:    movaps (%rdi), %xmm0
+; CHECK-SSE4-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE2-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE2-O0:       # %bb.0:
+; CHECK-SSE2-O0-NEXT:    pushq %rax
+; CHECK-SSE2-O0-NEXT:    movl $2, %esi
+; CHECK-SSE2-O0-NEXT:    callq __atomic_load_16@PLT
+; CHECK-SSE2-O0-NEXT:    movq %rdx, %xmm1
+; CHECK-SSE2-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE2-O0-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O0-NEXT:    popq %rax
+; CHECK-SSE2-O0-NEXT:    retq
+;
+; CHECK-SSE4-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE4-O0:       # %bb.0:
+; CHECK-SSE4-O0-NEXT:    movapd (%rdi), %xmm0
+; CHECK-SSE4-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    vmovapd (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+  ret <4 x ptr addrspace(270)> %ret
+}
 
 define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
 ; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
@@ -727,7 +817,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
 }
 
 define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-;
 ; CHECK-SSE2-O3-LABEL: atomic_vec4_float_align:
 ; CHECK-SSE2-O3:       # %bb.0:
 ; CHECK-SSE2-O3-NEXT:    pushq %rax
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll 
b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 84c7df120e32f..17d99c6459604 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 6
-; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck 
%s --check-prefixes=CHECK,CHECK64
+; RUN: opt -S %s -passes=atomic-expand -mtriple=i686-linux-gnu | FileCheck %s 
--check-prefixes=CHECK,CHECK32
 
 ; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
 ; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
@@ -94,98 +95,262 @@ define void @float_store_expand_addr1(ptr addrspace(1) 
%ptr, float %v) {
 }
 
 define void @pointer_cmpxchg_expand(ptr %ptr, ptr %v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand(
-; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
seq_cst monotonic, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 
1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand(
+; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
seq_cst monotonic, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand(
+; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] 
seq_cst monotonic, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg ptr %ptr, ptr null, ptr %v seq_cst monotonic
   ret void
 }
 
 define void @pointer_cmpxchg_expand2(ptr %ptr, ptr %v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand2(
-; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
release monotonic, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 
1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand2(
+; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
release monotonic, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand2(
+; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] 
release monotonic, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg ptr %ptr, ptr null, ptr %v release monotonic
   ret void
 }
 
 define void @pointer_cmpxchg_expand3(ptr %ptr, ptr %v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand3(
-; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
seq_cst seq_cst, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 
1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand3(
+; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] 
seq_cst seq_cst, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand3(
+; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i32 0, i32 [[TMP1]] 
seq_cst seq_cst, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg ptr %ptr, ptr null, ptr %v seq_cst seq_cst
   ret void
 }
 
 define void @pointer_cmpxchg_expand4(ptr %ptr, ptr %v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand4(
-; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] 
seq_cst seq_cst, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 
1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand4(
+; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 
[[TMP1]] seq_cst seq_cst, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand4(
+; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i32 0, i32 
[[TMP1]] seq_cst seq_cst, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg weak ptr %ptr, ptr null, ptr %v seq_cst seq_cst
   ret void
 }
 
 define void @pointer_cmpxchg_expand5(ptr %ptr, ptr %v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand5(
-; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 
[[TMP1]] seq_cst seq_cst, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 
1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand5(
+; CHECK64-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 
[[TMP1]] seq_cst seq_cst, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand5(
+; CHECK32-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i32 0, i32 
[[TMP1]] seq_cst seq_cst, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr 
[[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 
[[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg volatile ptr %ptr, ptr null, ptr %v seq_cst seq_cst
   ret void
 }
 
 define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ptr addrspace(2) 
%v) {
-; CHECK-LABEL: define void @pointer_cmpxchg_expand6(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 
[[TMP1]] seq_cst seq_cst, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, 
ptr addrspace(2) [[TMP5]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], 
i1 [[TMP4]], 1
-; CHECK-NEXT:    ret void
+; CHECK64-LABEL: define void @pointer_cmpxchg_expand6(
+; CHECK64-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) {
+; CHECK64-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64
+; CHECK64-NEXT:    [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 
[[TMP1]] seq_cst seq_cst, align 8
+; CHECK64-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK64-NEXT:    [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
+; CHECK64-NEXT:    [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, 
ptr addrspace(2) [[TMP5]], 0
+; CHECK64-NEXT:    [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } 
[[TMP6]], i1 [[TMP4]], 1
+; CHECK64-NEXT:    ret void
+;
+; CHECK32-LABEL: define void @pointer_cmpxchg_expand6(
+; CHECK32-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) {
+; CHECK32-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i32
+; CHECK32-NEXT:    [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 0, i32 
[[TMP1]] seq_cst seq_cst, align 4
+; CHECK32-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK32-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK32-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(2)
+; CHECK32-NEXT:    [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, 
ptr addrspace(2) [[TMP5]], 0
+; CHECK32-NEXT:    [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } 
[[TMP6]], i1 [[TMP4]], 1
+; CHECK32-NEXT:    ret void
 ;
   cmpxchg ptr addrspace(1) %ptr, ptr addrspace(2) null, ptr addrspace(2) %v 
seq_cst seq_cst
   ret void
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK64-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
+; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK64-NEXT:    [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK64-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; CHECK64-NEXT:    [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr>
+; CHECK64-NEXT:    ret <2 x ptr> [[TMP3]]
+;
+; CHECK32-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
+; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK32-NEXT:    [[RET:%.*]] = load atomic <2 x ptr>, ptr [[X]] acquire, 
align 16
+; CHECK32-NEXT:    ret <2 x ptr> [[RET]]
+;
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
+; CHECK64-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
+; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT:    [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK64-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK64-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr 
addrspace(270)>
+; CHECK64-NEXT:    ret <4 x ptr addrspace(270)> [[TMP3]]
+;
+; CHECK32-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
+; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT:    [[TMP1:%.*]] = alloca <4 x ptr addrspace(270)>, align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], 
i32 2)
+; CHECK32-NEXT:    [[TMP2:%.*]] = load <4 x ptr addrspace(270)>, ptr [[TMP1]], 
align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    ret <4 x ptr addrspace(270)> [[TMP2]]
+;
+  %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+  ret <4 x ptr addrspace(270)> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 
8
+; CHECK-NEXT:    ret <2 x i16> [[RET]]
+;
+  %ret = load atomic <2 x i16>, ptr %x acquire, align 8
+  ret <2 x i16> %ret
+}
+
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
+; CHECK-NEXT:    [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT:    ret <2 x half> [[RET]]
+;
+  %ret = load atomic <2 x half>, ptr %x acquire, align 8
+  ret <2 x half> %ret
+}
+
+define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
+; CHECK64-LABEL: define <4 x i32> @atomic_vec4_i32(
+; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT:    [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK64-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK64-NEXT:    ret <4 x i32> [[TMP2]]
+;
+; CHECK32-LABEL: define <4 x i32> @atomic_vec4_i32(
+; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT:    [[TMP1:%.*]] = alloca <4 x i32>, align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], 
i32 2)
+; CHECK32-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %ret = load atomic <4 x i32>, ptr %x acquire, align 16
+  ret <4 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
+; CHECK64-LABEL: define <4 x float> @atomic_vec4_float(
+; CHECK64-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT:    [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK64-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
+; CHECK64-NEXT:    ret <4 x float> [[TMP2]]
+;
+; CHECK32-LABEL: define <4 x float> @atomic_vec4_float(
+; CHECK32-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT:    [[TMP1:%.*]] = alloca <4 x float>, align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    call void @__atomic_load(i32 16, ptr [[X]], ptr [[TMP1]], 
i32 2)
+; CHECK32-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
+; CHECK32-NEXT:    call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT:    ret <4 x float> [[TMP2]]
+;
+  %ret = load atomic <4 x float>, ptr %x acquire, align 16
+  ret <4 x float> %ret
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to