Thank you!
On Wed, Nov 23, 2022 at 3:11 PM Benjamin Kramer via cfe-commits <cfe-commits@lists.llvm.org> wrote: > > > Author: Benjamin Kramer > Date: 2022-11-23T13:11:16+01:00 > New Revision: 5cfc22cafe3f2465e0bb324f8daba82ffcabd0df > > URL: > https://github.com/llvm/llvm-project/commit/5cfc22cafe3f2465e0bb324f8daba82ffcabd0df > DIFF: > https://github.com/llvm/llvm-project/commit/5cfc22cafe3f2465e0bb324f8daba82ffcabd0df.diff > > LOG: Revert "[SROA] `isVectorPromotionViable()`: memory intrinsics operate on > vectors of bytes" > > This reverts commit cf624b23bc5d5a6161706d1663def49380ff816a. It > triggers crashes in clang, see the comments on github on the original > change. > > Added: > > > Modified: > clang/test/CodeGenOpenCL/amdgpu-nullptr.cl > llvm/lib/Transforms/Scalar/SROA.cpp > llvm/test/CodeGen/AMDGPU/v1024.ll > llvm/test/DebugInfo/X86/sroasplit-1.ll > llvm/test/DebugInfo/X86/sroasplit-4.ll > llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll > llvm/test/Transforms/SROA/address-spaces.ll > llvm/test/Transforms/SROA/alignment.ll > llvm/test/Transforms/SROA/alloca-address-space.ll > llvm/test/Transforms/SROA/basictest.ll > llvm/test/Transforms/SROA/pointer-offset-size.ll > llvm/test/Transforms/SROA/scalable-vectors.ll > llvm/test/Transforms/SROA/slice-width.ll > llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll > llvm/test/Transforms/SROA/tbaa-struct.ll > llvm/test/Transforms/SROA/tbaa-struct2.ll > llvm/test/Transforms/SROA/vector-promotion.ll > > Removed: > > > > ################################################################################ > diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl > b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl > index 859e81f08d6bd..65f6f2e7d8c24 100644 > --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl > +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl > @@ -515,17 +515,13 @@ typedef struct { > private char *p; > } StructTy3; > > -// CHECK-LABEL: @test_memset_private( > -// CHECK-NEXT: entry: > -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] > addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)* > -// CHECK-NEXT: > [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast > [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)* > -// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* > [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, > !tbaa.struct !9 > -// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds > [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4 > -// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 > addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], > align 8, !tbaa.struct !12 > -// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, > i8 addrspace(5)* [[TMP0]], i32 36 > -// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* > [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)* > -// CHECK-NEXT: store i32 0, i32 addrspace(5)* > [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13 > -// CHECK-NEXT: ret void > +// CHECK-LABEL: test_memset_private > +// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 > {{.*}}, i8 0, i64 32, i1 false) > +// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, > %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4 > +// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 > addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]] > +// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, > i32 36 > +// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 > addrspace(5)* > +// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4 > void test_memset_private(private StructTy3 *ptr) { > StructTy3 S3 = {0, 0, 0, 0, 0}; > *ptr = S3; > > diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp > b/llvm/lib/Transforms/Scalar/SROA.cpp > index 09a445c236fa7..6dcdd630b6bae 100644 > --- a/llvm/lib/Transforms/Scalar/SROA.cpp > +++ b/llvm/lib/Transforms/Scalar/SROA.cpp > @@ -1806,10 +1806,8 @@ static bool isVectorPromotionViableForSlice(Partition > &P, const Slice &S, > ? Ty->getElementType() > : FixedVectorType::get(Ty->getElementType(), > NumElements); > > - Type *SplitIntTy = nullptr; > - if (uint64_t Bitwidth = NumElements * ElementSize * 8; > - Bitwidth <= IntegerType::MAX_INT_BITS) > - SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth); > + Type *SplitIntTy = > + Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); > > Use *U = S.getUse(); > > @@ -1828,8 +1826,7 @@ static bool isVectorPromotionViableForSlice(Partition > &P, const Slice &S, > // Disable vector promotion when there are loads or stores of an FCA. > if (LTy->isStructTy()) > return false; > - if (SplitIntTy && > - (P.beginOffset() > S.beginOffset() || P.endOffset() < > S.endOffset())) { > + if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { > assert(LTy->isIntegerTy()); > LTy = SplitIntTy; > } > @@ -1842,8 +1839,7 @@ static bool isVectorPromotionViableForSlice(Partition > &P, const Slice &S, > // Disable vector promotion when there are loads or stores of an FCA. > if (STy->isStructTy()) > return false; > - if (SplitIntTy && > - (P.beginOffset() > S.beginOffset() || P.endOffset() < > S.endOffset())) { > + if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { > assert(STy->isIntegerTy()); > STy = SplitIntTy; > } > @@ -1938,9 +1934,6 @@ static VectorType *isVectorPromotionViable(Partition > &P, const DataLayout &DL) { > CheckCandidateType(LI->getType()); > else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) > CheckCandidateType(SI->getValueOperand()->getType()); > - else if (auto *MTI = dyn_cast<MemIntrinsic>(S.getUse()->getUser())) > - CheckCandidateType(FixedVectorType::get( > - IntegerType::getInt8Ty(MTI->getContext()), P.size())); > } > > // If we didn't find a vector type, nothing to do here. > > diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll > b/llvm/test/CodeGen/AMDGPU/v1024.ll > index 6dbb9443fd5cf..1326ba437f94f 100644 > --- a/llvm/test/CodeGen/AMDGPU/v1024.ll > +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll > @@ -4,7 +4,7 @@ > > ; GCN-LABEL: {{^}}test_v1024: > ; GCN-NOT: v_accvgpr > -; GCN-COUNT-10: v_mov_b32_e32 > +; GCN-COUNT-32: v_mov_b32_e32 > ; GCN-NOT: v_accvgpr > define amdgpu_kernel void @test_v1024() { > entry: > > diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll > b/llvm/test/DebugInfo/X86/sroasplit-1.ll > index 5a80b56950122..0ec368130da28 100644 > --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll > +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll > @@ -20,8 +20,10 @@ > ; > > ; Verify that SROA creates a variable piece when splitting i1. > -; CHECK: %[[I1:.*]] = load <12 x i8>, > -; CHECK: call void @llvm.dbg.value(metadata <12 x i8> %[[I1]], metadata > ![[VAR:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) > +; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4 > +; CHECK: call void @llvm.dbg.declare(metadata [12 x i8]* %[[I1]], metadata > ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) > +; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata > ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) > +; CHECK: ret i32 %[[A]] > ; Read Var and Piece: > ; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11, > > > diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll > b/llvm/test/DebugInfo/X86/sroasplit-4.ll > index a3b35b820a96c..0d5594ef867dd 100644 > --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll > +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll > @@ -1,28 +1,28 @@ > ; RUN: opt -sroa < %s -S -o - | FileCheck %s > ; > ; Test that recursively splitting an alloca updates the debug info correctly. > -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC:.*]], > metadata ![[Y:.*]], metadata !DIExpression()) > -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1:.*]], > metadata ![[Y]], metadata !DIExpression()) > -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[R:.*]], > metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) > -; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata > !DIExpression(DW_OP_LLVM_fragment, 64, 64)) > -; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata > !DIExpression(DW_OP_LLVM_fragment, 128, 64)) > -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1]], metadata > ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 128)) > -; > +; CHECK: %[[T:.*]] = load i64, i64* @t, align 8 > +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], > metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) > +; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8 > +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], > metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) > +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], > metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64)) > +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], > metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) > +; > ; struct p { > ; __SIZE_TYPE__ s; > ; __SIZE_TYPE__ t; > ; }; > -; > +; > ; struct r { > ; int i; > ; struct p x; > ; struct p y; > ; }; > -; > +; > ; extern int call_me(struct r); > ; extern int maybe(); > ; extern __SIZE_TYPE__ t; > -; > +; > ; int test() { > ; if (maybe()) > ; return 0; > > diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll > b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll > index 3361ad11c244b..886bde2686bd9 100644 > --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll > +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll > @@ -68,13 +68,12 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 > %arg) { > ; CHECK-LABEL: @_Z3foo1S( > ; CHECK-NEXT: bb: > ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 > -; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i64* > -; CHECK-NEXT: [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* > [[TMP0]], align 8 > -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 > [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* > +; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds > [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 > +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** > [[I1_SROA_0_0_I5_SROA_IDX]], align 8 > ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds > [[TMP0]], %0* [[I2]], i64 0, i32 0 > -; CHECK-NEXT: store i32* [[TMP1]], i32** [[I_SROA_0_0_I6_SROA_IDX]], > align 8 > +; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** > [[I_SROA_0_0_I6_SROA_IDX]], align 8 > ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) > align 8 [[I2]]) > -; CHECK-NEXT: ret i32* [[TMP1]] > +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] > ; > bb: > %i = alloca %0, align 8 > @@ -108,22 +107,21 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* > nocapture) > define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { > ; CHECK-LABEL: @_Z3bar1S( > ; CHECK-NEXT: bb: > -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i64* > -; CHECK-NEXT: [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* > [[TMP0]], align 8 > -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 > [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* > +; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds > [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 > +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** > [[I1_SROA_0_0_I4_SROA_IDX]], align 8 > ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() > ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 > ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] > ; CHECK: bb7: > ; CHECK-NEXT: tail call void @_Z5sync0v() > -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]]) > +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) > ; CHECK-NEXT: br label [[BB13:%.*]] > ; CHECK: bb10: > ; CHECK-NEXT: tail call void @_Z5sync1v() > -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) > +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) > ; CHECK-NEXT: br label [[BB13]] > ; CHECK: bb13: > -; CHECK-NEXT: ret i32* [[TMP1]] > +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] > ; > bb: > %i = alloca %0, align 8 > > diff --git a/llvm/test/Transforms/SROA/address-spaces.ll > b/llvm/test/Transforms/SROA/address-spaces.ll > index 0300e99f9a217..70e1a682d7bfc 100644 > --- a/llvm/test/Transforms/SROA/address-spaces.ll > +++ b/llvm/test/Transforms/SROA/address-spaces.ll > @@ -11,8 +11,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) > nocapture, ptr addrspace(1) > ; Make sure an illegal bitcast isn't introduced > define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) > %b) { > ; CHECK-LABEL: @test_address_space_1_1( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > addrspace(1) [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) > [[A:%.*]], align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16 > @@ -23,8 +23,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, > ptr addrspace(1) %b) { > > define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { > ; CHECK-LABEL: @test_address_space_1_0( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > addrspace(1) [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], > align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) > [[A:%.*]], align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16 > @@ -35,8 +35,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, > ptr %b) { > > define void @test_address_space_0_1(ptr %a, ptr addrspace(1) %b) { > ; CHECK-LABEL: @test_address_space_0_1( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], > align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16 > > diff --git a/llvm/test/Transforms/SROA/alignment.ll > b/llvm/test/Transforms/SROA/alignment.ll > index ba673b1d16465..66da09cd52d27 100644 > --- a/llvm/test/Transforms/SROA/alignment.ll > +++ b/llvm/test/Transforms/SROA/alignment.ll > @@ -92,15 +92,15 @@ define void @PR13920(ptr %a, ptr %b) { > ; Test that alignments on memcpy intrinsics get propagated to loads and > stores. > ; CHECK-LABEL: @PR13920( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], > align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], > align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > ; DEBUGLOC-LABEL: @PR13920( > ; DEBUGLOC-NEXT: entry: > ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] > -; DEBUGLOC-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] > -; DEBUGLOC-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], > align 2, !dbg [[DBG40:![0-9]+]] > +; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], > align 2, !dbg [[DBG39:![0-9]+]] > +; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align > 2, !dbg [[DBG40:![0-9]+]] > ; DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]] > ; > > @@ -118,17 +118,21 @@ define void @test3(ptr %x) { > ; reduce the alignment. > ; CHECK-LABEL: @test3( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr > [[X:%.*]], align 8 > -; CHECK-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], > align 2 > +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8 > +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], > ptr align 8 [[X:%.*]], i32 22, i1 false) > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], > ptr align 2 [[X]], i32 18, i1 false) > ; CHECK-NEXT: ret void > ; > ; DEBUGLOC-LABEL: @test3( > ; DEBUGLOC-NEXT: entry: > -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] > -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] > -; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr > [[X:%.*]], align 8, !dbg [[DBG49:![0-9]+]] > +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg > [[DBG47:![0-9]+]] > +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] > +; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg > [[DBG48:![0-9]+]] > +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] > +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 > [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]] > ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata > [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] > -; DEBUGLOC-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], > align 2, !dbg [[DBG51:![0-9]+]] > +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 > [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]] > ; DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]] > ; > > > diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll > b/llvm/test/Transforms/SROA/alloca-address-space.ll > index b06f269d806a5..d4f305c39c8fe 100644 > --- a/llvm/test/Transforms/SROA/alloca-address-space.ll > +++ b/llvm/test/Transforms/SROA/alloca-address-space.ll > @@ -10,8 +10,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) > nocapture, ptr addrspace(1) > > define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) > %b) { > ; CHECK-LABEL: @test_address_space_1_1( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > addrspace(1) [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) > [[A:%.*]], align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16, addrspace(2) > @@ -22,8 +22,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, > ptr addrspace(1) %b) { > > define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) > %b) { > ; CHECK-LABEL: @test_address_space_1_0( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > addrspace(1) [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(2) > [[B:%.*]], align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) > [[A:%.*]], align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(2) > [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16, addrspace(2) > @@ -34,8 +34,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, > ptr addrspace(2) %b) { > > define void @test_address_space_0_1(ptr addrspace(2) %a, ptr addrspace(1) > %b) { > ; CHECK-LABEL: @test_address_space_0_1( > -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > addrspace(2) [[A:%.*]], align 2 > -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(2) > [[A:%.*]], align 2 > +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) > [[B:%.*]], align 2 > ; CHECK-NEXT: ret void > ; > %aa = alloca <2 x i64>, align 16, addrspace(2) > > diff --git a/llvm/test/Transforms/SROA/basictest.ll > b/llvm/test/Transforms/SROA/basictest.ll > index a95d84dc913b5..5ac8ed8c6e6a3 100644 > --- a/llvm/test/Transforms/SROA/basictest.ll > +++ b/llvm/test/Transforms/SROA/basictest.ll > @@ -139,83 +139,100 @@ L2: > define void @test3(ptr %dst, ptr align 8 %src) { > ; CHECK-LABEL: @test3( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <42 x i8>, ptr > [[SRC:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [42 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_32:%.*]] = alloca [16 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], > ptr align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]] > ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 42 > ; CHECK-NEXT: [[A_SROA_2_0_COPYLOAD:%.*]] = load i8, ptr > [[A_SROA_2_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 43 > -; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <99 x i8>, ptr > [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], > ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 99, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_32_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 142 > -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > [[A_SROA_32_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32]], > ptr align 2 [[A_SROA_32_0_SRC_SROA_IDX]], i32 16, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 158 > -; CHECK-NEXT: [[A_SROA_15_SROA_0_0_COPYLOAD:%.*]] = load <42 x i8>, ptr > [[A_SROA_15_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15]], > ptr align 2 [[A_SROA_15_0_SRC_SROA_IDX]], i32 42, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 200 > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr > [[A_SROA_16_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], > ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 207 > ; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, ptr > [[A_SROA_23_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_235_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 208 > -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr > [[A_SROA_235_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_235]], ptr align 8 [[A_SROA_235_0_SRC_SROA_IDX]], i32 7, i1 false), > !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 215 > -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <85 x i8>, ptr > [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x > i8> [[A_SROA_32_SROA_0_0_COPYLOAD]], i8 1, i32 0 > -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND30:%.*]] = select <16 x i1> > <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false>, <16 x i8> <i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> > to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> > to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, > i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VEC_INSERT]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND28:%.*]] = select <16 x i1> > <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false>, <16 x i8> <i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to > <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to > <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to > <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to > <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> > [[A_SROA_32_SROA_0_0_VECBLEND30]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND:%.*]] = select <16 x i1> <i1 > true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, > <16 x i8> <i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x > i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND28]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_1_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 > true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, > <16 x i8> <i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> > to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_2_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 > true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, > <16 x i8> <i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> > <i64 3> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_1_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_3_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 > true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, > <16 x i8> <i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast > (<1 x i64> <i64 4> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_2_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_4_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 > true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 > x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> > bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_3_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_5_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 > true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <16 > x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 0), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 1), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 2), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 3), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 4), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 5), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 6), i8 extractelement > (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 7), i8 undef, i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_4_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_6_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 > true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x > i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 0), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 1), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 2), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 3), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 4), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 5), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 6), i8 > extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 7), i8 > undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_5_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_7_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, > i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>, <16 > x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 0), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 1), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 2), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 3), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 4), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 5), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 6), > i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 7), > i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_6_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_32_SROA_0_8_VECBLEND:%.*]] = select <16 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 > x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, > i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x > i8>), i32 7)>, <16 x i8> [[A_SROA_32_SROA_0_7_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_INSERT:%.*]] = insertelement <7 x > i8> [[A_SROA_16_SROA_0_0_COPYLOAD]], i8 1, i32 0 > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND20:%.*]] = select <7 x i1> <i1 > true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <7 x i8> > <i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 0), > i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 1), > i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> > [[A_SROA_16_SROA_0_0_VEC_INSERT]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> <i1 > true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <7 x i8> <i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 > undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND20]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> <i8 > undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), > i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), > i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), > i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), > i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> <i8 > undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_1_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 > undef, i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> <i1 > true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <7 x i8> <i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 > extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 > undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_0_COPYLOAD]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_INSERT:%.*]] = insertelement <7 x > i8> [[A_SROA_235_SROA_0_0_VECBLEND]], i8 1, i32 1 > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND15:%.*]] = select <7 x i1> > <i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <7 x > i8> <i8 undef, i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x > i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x > i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> > [[A_SROA_235_SROA_0_1_VEC_INSERT]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> <i8 > undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), > i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), > i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), > i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), > i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND15]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> <i8 > undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 > x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> <i1 > false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 > undef, i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> > <i32 4> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_15_SROA_0_39_COPYLOAD:%.*]] = load <3 x i8>, ptr > [[SRC]], align 1, !tbaa [[TBAA3:![0-9]+]] > -; CHECK-NEXT: [[A_SROA_15_SROA_0_39_VEC_EXPAND:%.*]] = shufflevector <3 x > i8> [[A_SROA_15_SROA_0_39_COPYLOAD]], <3 x i8> poison, <42 x i32> <i32 undef, > i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, > i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, > i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, > i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, > i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, > i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2> > -; CHECK-NEXT: [[A_SROA_15_SROA_0_39_VECBLEND:%.*]] = select <42 x i1> <i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 true, i1 true, i1 true>, <42 x i8> > [[A_SROA_15_SROA_0_39_VEC_EXPAND]], <42 x i8> [[A_SROA_15_SROA_0_0_COPYLOAD]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], > ptr align 1 [[A_SROA_31_0_SRC_SROA_IDX]], i32 85, i1 false), !tbaa [[TBAA0]] > +; CHECK-NEXT: store i8 1, ptr [[A_SROA_32]], align 1, !tbaa > [[TBAA3:![0-9]+]] > +; CHECK-NEXT: store i16 1, ptr [[A_SROA_32]], align 1, !tbaa > [[TBAA5:![0-9]+]] > +; CHECK-NEXT: store i32 1, ptr [[A_SROA_32]], align 1, !tbaa > [[TBAA7:![0-9]+]] > +; CHECK-NEXT: store i64 1, ptr [[A_SROA_32]], align 1, !tbaa > [[TBAA9:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 1 > +; CHECK-NEXT: store i64 2, ptr [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX]], > align 1, !tbaa [[TBAA11:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 2 > +; CHECK-NEXT: store i64 3, ptr [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX]], > align 1, !tbaa [[TBAA13:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 3 > +; CHECK-NEXT: store i64 4, ptr [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX]], > align 1, !tbaa [[TBAA15:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 4 > +; CHECK-NEXT: store i64 5, ptr [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX]], > align 1, !tbaa [[TBAA17:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 5 > +; CHECK-NEXT: store i64 6, ptr [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX]], > align 1, !tbaa [[TBAA19:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 6 > +; CHECK-NEXT: store i64 7, ptr [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX]], > align 1, !tbaa [[TBAA21:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 7 > +; CHECK-NEXT: store i64 8, ptr [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX]], > align 1, !tbaa [[TBAA23:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_32]], i64 8 > +; CHECK-NEXT: store i64 9, ptr [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX]], > align 1, !tbaa [[TBAA25:![0-9]+]] > +; CHECK-NEXT: store i8 1, ptr [[A_SROA_16]], align 1, !tbaa > [[TBAA27:![0-9]+]] > +; CHECK-NEXT: store i16 1, ptr [[A_SROA_16]], align 1, !tbaa > [[TBAA29:![0-9]+]] > +; CHECK-NEXT: store i32 1, ptr [[A_SROA_16]], align 1, !tbaa > [[TBAA31:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_16]], i64 1 > +; CHECK-NEXT: store i32 2, ptr [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX]], > align 1, !tbaa [[TBAA33:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 > +; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], > align 1, !tbaa [[TBAA35:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_16]], i64 3 > +; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX]], > align 1, !tbaa [[TBAA37:![0-9]+]] > +; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa > [[TBAA39:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 > +; CHECK-NEXT: store i8 1, ptr > [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11]], align 1, !tbaa [[TBAA41:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 > +; CHECK-NEXT: store i16 1, ptr > [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA43:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 > +; CHECK-NEXT: store i32 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], > align 1, !tbaa [[TBAA45:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 > +; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], > align 1, !tbaa [[TBAA47:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 3 > +; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], > align 1, !tbaa [[TBAA49:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_15]], i64 39 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 > false), !tbaa [[TBAA51:![0-9]+]] > ; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 3 > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD23:%.*]] = load <5 x i8>, ptr > [[A_SROA_16_197_SRC_SROA_IDX]], align 1, !tbaa [[TBAA3]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <5 x > i8> [[A_SROA_16_SROA_0_0_COPYLOAD23]], <5 x i8> poison, <7 x i32> <i32 0, i32 > 1, i32 2, i32 3, i32 4, i32 undef, i32 undef> > -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND24:%.*]] = select <7 x i1> <i1 > true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> > [[A_SROA_16_SROA_0_0_VEC_EXPAND]], <7 x i8> [[A_SROA_16_SROA_0_3_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND25:%.*]] = select <7 x i1> <i1 > false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 > undef, i8 undef, i8 42, i8 42, i8 42, i8 42, i8 42>, <7 x i8> > [[A_SROA_16_SROA_0_0_VECBLEND24]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND13:%.*]] = select <7 x i1> > <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <7 x > i8> <i8 42, i8 42, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x > i8> [[A_SROA_235_SROA_0_3_VECBLEND]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_COPYLOAD:%.*]] = load <5 x i8>, ptr > [[SRC]], align 1, !tbaa [[TBAA5:![0-9]+]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_EXPAND:%.*]] = shufflevector <5 x > i8> [[A_SROA_235_SROA_0_1_COPYLOAD]], <5 x i8> poison, <7 x i32> <i32 undef, > i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef> > -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND17:%.*]] = select <7 x i1> > <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> > [[A_SROA_235_SROA_0_1_VEC_EXPAND]], <7 x i8> > [[A_SROA_235_SROA_0_0_VECBLEND13]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_COPYLOAD:%.*]] = load <5 x i8>, ptr > [[SRC]], align 1, !tbaa [[TBAA7:![0-9]+]] > -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VEC_EXPAND:%.*]] = shufflevector <5 x > i8> [[A_SROA_235_SROA_0_2_COPYLOAD]], <5 x i8> poison, <7 x i32> <i32 undef, > i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4> > -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND18:%.*]] = select <7 x i1> > <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <7 x i8> > [[A_SROA_235_SROA_0_2_VEC_EXPAND]], <7 x i8> > [[A_SROA_235_SROA_0_1_VECBLEND17]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], > ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]] > +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 > [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12]], i8 42, i32 5, i1 false), !tbaa > [[TBAA53:![0-9]+]] > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235]], > i8 42, i32 2, i1 false), !tbaa [[TBAA53]] > +; CHECK-NEXT: [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8]], ptr align 1 [[SRC]], i32 5, i1 > false), !tbaa [[TBAA55:![0-9]+]] > +; CHECK-NEXT: [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9:%.*]] = > getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], ptr align 1 [[SRC]], i32 5, i1 > false), !tbaa [[TBAA57:![0-9]+]] > ; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 5 > -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD11:%.*]] = load <3 x i8>, ptr > [[A_SROA_31_210_SRC_SROA_IDX]], align 1, !tbaa [[TBAA7]] > -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x > i8> [[A_SROA_31_SROA_0_0_COPYLOAD11]], <3 x i8> poison, <85 x i32> <i32 0, > i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 > undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> > -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_VECBLEND:%.*]] = select <85 x i1> <i1 > true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, > i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 > false, i1 false>, <85 x i8> [[A_SROA_31_SROA_0_0_VEC_EXPAND]], <85 x i8> > [[A_SROA_31_SROA_0_0_COPYLOAD]] > -; CHECK-NEXT: store <42 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr > [[DST:%.*]], align 1, !tbaa [[TBAA9:![0-9]+]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], > ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA57]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], > ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA59:![0-9]+]] > ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 42 > -; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa > [[TBAA9]] > +; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa > [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 43 > -; CHECK-NEXT: store <99 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_32_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 142 > -; CHECK-NEXT: store <16 x i8> [[A_SROA_32_SROA_0_8_VECBLEND]], ptr > [[A_SROA_32_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_32_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_32]], i32 16, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 158 > -; CHECK-NEXT: store <42 x i8> [[A_SROA_15_SROA_0_39_VECBLEND]], ptr > [[A_SROA_15_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 200 > -; CHECK-NEXT: store <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND25]], ptr > [[A_SROA_16_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_16]], i32 7, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 207 > -; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, > !tbaa [[TBAA9]] > +; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_235_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 208 > -; CHECK-NEXT: store <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND18]], ptr > [[A_SROA_235_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_235_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 7, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 215 > -; CHECK-NEXT: store <85 x i8> [[A_SROA_31_SROA_0_0_VECBLEND]], ptr > [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), > !tbaa [[TBAA59]] > ; CHECK-NEXT: ret void > ; > > @@ -298,29 +315,60 @@ entry: > define void @test4(ptr %dst, ptr %src) { > ; CHECK-LABEL: @test4( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <20 x i8>, ptr > [[SRC:%.*]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [20 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_2_SROA_4:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [10 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_31_SROA_5:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_6_SROA_4:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: [[A_SROA_7:%.*]] = alloca [40 x i8], align 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], > ptr align 1 [[SRC:%.*]], i32 20, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 20 > -; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr > [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr > [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 2 > +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr > [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_2_SROA_4]], ptr align 1 > [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), > !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 30 > -; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr > [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], > ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 10, i1 false), !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 40 > -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr > [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr > [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: > [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 2 > +; CHECK-NEXT: [[A_SROA_31_SROA_4_0_COPYLOAD:%.*]] = load i8, ptr > [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA0]] > +; CHECK-NEXT: > [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_31_SROA_5]], ptr align 1 > [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), > !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_6_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 50 > -; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr > [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr > [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 2 > +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr > [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA0]] > +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_6_SROA_4]], ptr align 1 > [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), > !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_7_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 60 > -; CHECK-NEXT: [[A_SROA_7_SROA_0_0_COPYLOAD:%.*]] = load <40 x i8>, ptr > [[A_SROA_7_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > -; CHECK-NEXT: [[A_SROA_31_SROA_0_2_VEC_INSERT:%.*]] = insertelement <10 x > i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], i8 0, i32 2 > -; CHECK-NEXT: store <20 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr > [[DST:%.*]], align 1, !tbaa [[TBAA11:![0-9]+]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7]], > ptr align 1 [[A_SROA_7_0_SRC_SROA_IDX]], i32 40, i1 false), !tbaa [[TBAA0]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), > !tbaa [[TBAA3]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), > !tbaa [[TBAA5]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], > ptr align 1 [[A_SROA_0]], i32 20, i1 false), !tbaa [[TBAA7]] > ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 20 > -; CHECK-NEXT: store <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] > +; CHECK-NEXT: store i16 [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] > +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 2 > +; CHECK-NEXT: store i8 [[A_SROA_2_SROA_3_0_COPYLOAD]], ptr > [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA7]] > +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 > [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] > ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 30 > -; CHECK-NEXT: store <10 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 10, i1 false), > !tbaa [[TBAA7]] > ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 40 > -; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] > +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] > +; CHECK-NEXT: > [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 2 > +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr > [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA7]] > +; CHECK-NEXT: > [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 > [[A_SROA_31_SROA_5]], i32 7, i1 false), !tbaa [[TBAA7]] > ; CHECK-NEXT: [[A_SROA_6_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 50 > -; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] > +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] > +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 2 > +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr > [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa > [[TBAA7]] > +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] > = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 3 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 > [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] > ; CHECK-NEXT: [[A_SROA_7_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 60 > -; CHECK-NEXT: store <40 x i8> [[A_SROA_7_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_7_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 > [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), > !tbaa [[TBAA7]] > ; CHECK-NEXT: ret void > ; > > @@ -407,8 +455,8 @@ define void @test7(ptr %src, ptr %dst) { > ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 > ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr > [[SRC:%.*]], align 1, !tbaa [[TBAA0]] > ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], ptr > [[A_SROA_0]], align 4, !tbaa [[TBAA0]] > -; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile > i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA13:![0-9]+]] > -; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr > [[DST:%.*]], align 1, !tbaa [[TBAA13]] > +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile > i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA3]] > +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr > [[DST:%.*]], align 1, !tbaa [[TBAA3]] > ; CHECK-NEXT: ret void > ; > > @@ -428,9 +476,9 @@ define %S2 @test8(ptr %arg) { > ; CHECK-NEXT: entry: > ; CHECK-NEXT: [[S2_NEXT_PTR:%.*]] = getelementptr [[S2:%.*]], ptr > [[ARG:%.*]], i64 0, i32 1 > ; CHECK-NEXT: [[S2_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_PTR]], align 8, > !tbaa [[TBAA0]] > -; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, > !tbaa [[TBAA13]] > +; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, > !tbaa [[TBAA3]] > ; CHECK-NEXT: [[S2_NEXT_NEXT_PTR:%.*]] = getelementptr [[S2]], ptr > [[S2_NEXT]], i64 0, i32 1 > -; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], > align 8, !tbaa [[TBAA11]] > +; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], > align 8, !tbaa [[TBAA7]] > ; CHECK-NEXT: [[RESULT1:%.*]] = insertvalue [[S2]] poison, ptr > [[S2_NEXT_S1]], 0 > ; CHECK-NEXT: [[RESULT2:%.*]] = insertvalue [[S2]] [[RESULT1]], ptr > [[S2_NEXT_NEXT]], 1 > ; CHECK-NEXT: ret [[S2]] [[RESULT2]] > @@ -677,7 +725,7 @@ define void @test16(ptr %src, ptr %dst) { > ; CHECK-LABEL: @test16( > ; CHECK-NEXT: entry: > ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i24, ptr [[SRC:%.*]], > align 1, !tbaa [[TBAA0]] > -; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa > [[TBAA15:![0-9]+]] > +; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA5]] > ; CHECK-NEXT: ret void > ; > > @@ -696,7 +744,7 @@ define void @test17(ptr %src, ptr %dst) { > ; CHECK-NEXT: entry: > ; CHECK-NEXT: [[A:%.*]] = alloca [3 x i8], align 1 > ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[A]], ptr > [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0]] > -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr > [[A]], i32 4, i1 true), !tbaa [[TBAA13]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr > [[A]], i32 4, i1 true), !tbaa [[TBAA3]] > ; CHECK-NEXT: ret void > ; > > @@ -717,12 +765,12 @@ define void @test18(ptr %src, ptr %dst, i32 %size) { > ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i32, ptr [[SRC:%.*]], > align 1, !tbaa [[TBAA0]] > ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[SRC]], i64 4 > ; CHECK-NEXT: [[A_SROA_3_0_COPYLOAD:%.*]] = load i32, ptr > [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] > -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], > ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA13]] > -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 > 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA15]] > -; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa > [[TBAA17:![0-9]+]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], > ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA3]] > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 > 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA5]] > +; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA9]] > ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds > i8, ptr [[DST]], i64 4 > -; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr > [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA17]] > -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 > [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA19:![0-9]+]] > +; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr > [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 > [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA11]] > ; CHECK-NEXT: ret void > ; > > @@ -957,7 +1005,8 @@ define void @PR14034(ptr %ptr, ptr %ptr2) { > ; thing is to handle empty structs gracefully. > ; CHECK-LABEL: @PR14034( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: store <12 x i8> undef, ptr [[PTR2:%.*]], align 1 > +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [12 x i8], align 8 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[PTR2:%.*]], > ptr align 8 [[A_SROA_0]], i32 12, i1 false) > ; CHECK-NEXT: ret void > ; > > @@ -1498,8 +1547,8 @@ define void @test24(ptr %src, ptr %dst) { > ; CHECK-NEXT: [[A:%.*]] = alloca i64, align 16 > ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load volatile i64, ptr [[SRC:%.*]], > align 1, !tbaa [[TBAA0]] > ; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD]], ptr [[A]], align 16, > !tbaa [[TBAA0]] > -; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align > 16, !tbaa [[TBAA13]] > -; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], > align 1, !tbaa [[TBAA13]] > +; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align > 16, !tbaa [[TBAA3]] > +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], > align 1, !tbaa [[TBAA3]] > ; CHECK-NEXT: ret void > ; > > > diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll > b/llvm/test/Transforms/SROA/pointer-offset-size.ll > index bf3c63c1ae7a3..76b52098a7e62 100644 > --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll > +++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll > @@ -8,7 +8,8 @@ target datalayout = "e-p:64:64:64:32" > define i16 @test(ptr %ts2.i) { > ; CHECK-LABEL: @test( > ; CHECK-NEXT: entry: > -; CHECK-NEXT: store <3 x i8> undef, ptr [[TS2_I:%.*]], align 1 > +; CHECK-NEXT: [[S_SROA_0:%.*]] = alloca [3 x i8], align 8 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS2_I:%.*]], > ptr align 8 [[S_SROA_0]], i32 3, i1 false) > ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[TS2_I]], align 2 > ; CHECK-NEXT: ret i16 [[TMP0]] > ; > > diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll > b/llvm/test/Transforms/SROA/scalable-vectors.ll > index bf9742e570084..d96f4dba868b3 100644 > --- a/llvm/test/Transforms/SROA/scalable-vectors.ll > +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll > @@ -67,7 +67,7 @@ define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale > x 4 x i32> %type.coe > define <vscale x 4 x i32> @cast_alloca_from_svint32_t() { > ; CHECK-LABEL: @cast_alloca_from_svint32_t( > ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16 > -; CHECK-NEXT: store <64 x i8> undef, ptr [[RETVAL_COERCE]], align 16 > +; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 > ; CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr > [[RETVAL_COERCE]], align 16 > ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] > ; > > diff --git a/llvm/test/Transforms/SROA/slice-width.ll > b/llvm/test/Transforms/SROA/slice-width.ll > index 2aa54a104153f..7d2aeaaff57bc 100644 > --- a/llvm/test/Transforms/SROA/slice-width.ll > +++ b/llvm/test/Transforms/SROA/slice-width.ll > @@ -46,7 +46,8 @@ load_i1: > > define void @memcpy_fp80_padding() { > ; CHECK-LABEL: @memcpy_fp80_padding( > -; CHECK-NEXT: [[X_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > @foo_copy_source, align 16 > +; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], > ptr align 16 @foo_copy_source, i32 16, i1 false) > ; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr > inbounds (i8, ptr @foo_copy_source, i64 16), align 16 > ; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr > inbounds (i8, ptr @foo_copy_source, i64 24), align 8 > ; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4 > @@ -66,6 +67,8 @@ define void @memcpy_fp80_padding() { > > define void @memset_fp80_padding() { > ; CHECK-LABEL: @memset_fp80_padding( > +; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[X_SROA_0]], i8 > -1, i32 16, i1 false) > ; CHECK-NEXT: store i64 -1, ptr @i64_sink, align 4 > ; CHECK-NEXT: ret void > ; > @@ -133,7 +136,8 @@ define void @PR50888() { > > define void @PR50910() { > ; CHECK-LABEL: @PR50910( > -; CHECK-NEXT: [[T1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i8> > undef, i8 0, i32 0 > +; CHECK-NEXT: [[T1:%.*]] = alloca i8, i64 1, align 8 > +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[T1]], i8 0, i64 > 1, i1 false) > ; CHECK-NEXT: ret void > ; > %t1 = alloca i8, i64 1, align 8 > > diff --git a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll > b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll > index c4e68ff0799ea..00cbe56929c02 100644 > --- a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll > +++ b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll > @@ -43,15 +43,11 @@ define amdgpu_kernel void @test_memset() #0 { > ; CHECK-NEXT: entry: > ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 > ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> > -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> > ; CHECK-NEXT: br label [[BB:%.*]] > ; CHECK: bb: > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 0 > -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 1 > -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 2 > -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 0 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 1 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 2 > ; CHECK-NEXT: ret void > ; > entry: > @@ -239,23 +235,25 @@ bb: > define amdgpu_kernel void @test_half_array() #0 { > ; CHECK-LABEL: @test_half_array( > ; CHECK-NEXT: entry: > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 > [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 > [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) > ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float undef to i32 > ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float undef to i32 > ; CHECK-NEXT: [[DATA:%.*]] = load [4 x float], ptr undef, align 4 > ; CHECK-NEXT: [[DATA_FCA_0_EXTRACT:%.*]] = extractvalue [4 x float] > [[DATA]], 0 > -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[DATA_FCA_0_EXTRACT]] to <4 x > i8> > +; CHECK-NEXT: store float [[DATA_FCA_0_EXTRACT]], ptr > [[B_BLOCKWISE_COPY_SROA_0]], align 16 > ; CHECK-NEXT: [[DATA_FCA_1_EXTRACT:%.*]] = extractvalue [4 x float] > [[DATA]], 1 > -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[DATA_FCA_1_EXTRACT]] to <4 x > i8> > +; CHECK-NEXT: store float [[DATA_FCA_1_EXTRACT]], ptr > [[B_BLOCKWISE_COPY_SROA_4]], align 4 > ; CHECK-NEXT: [[DATA_FCA_2_EXTRACT:%.*]] = extractvalue [4 x float] > [[DATA]], 2 > ; CHECK-NEXT: [[DATA_FCA_3_EXTRACT:%.*]] = extractvalue [4 x float] > [[DATA]], 3 > ; CHECK-NEXT: br label [[BB:%.*]] > ; CHECK: bb: > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> <i32 2, i32 3> > -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT]] to half > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = > getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 > ; CHECK-NEXT: ret void > ; > entry: > @@ -277,17 +275,15 @@ bb: > define amdgpu_kernel void @test_array_vector() #0 { > ; CHECK-LABEL: @test_array_vector( > ; CHECK-NEXT: entry: > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align > 16 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 > [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) > ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 > ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> > -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> > ; CHECK-NEXT: br label [[BB:%.*]] > ; CHECK: bb: > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 0 > -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 1 > -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 2 > -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 0 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 1 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 2 > ; CHECK-NEXT: ret void > ; > entry: > @@ -309,17 +305,15 @@ bb: > define amdgpu_kernel void @test_array_vector2() #0 { > ; CHECK-LABEL: @test_array_vector2( > ; CHECK-NEXT: entry: > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align > 16 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 > [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) > ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 > ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> > -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> > ; CHECK-NEXT: br label [[BB:%.*]] > ; CHECK: bb: > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 0 > -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 1 > -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x i16> [[TMP1]], i32 2 > -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 0 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 1 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = > extractelement <8 x half> [[TMP0]], i32 2 > ; CHECK-NEXT: ret void > ; > entry: > @@ -341,32 +335,38 @@ bb: > define amdgpu_kernel void @test_array_vector_no_vector_common_type() #0 { > ; CHECK-LABEL: @test_array_vector_no_vector_common_type( > ; CHECK-NEXT: entry: > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7:%.*]] = alloca float, align 8 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10:%.*]] = alloca float, align 4 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_13:%.*]] = alloca <8 x half>, align > 16 > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 > [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 > [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 > [[B_BLOCKWISE_COPY_SROA_7]], i8 0, i32 4, i1 false) > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 > [[B_BLOCKWISE_COPY_SROA_10]], i8 0, i32 4, i1 false) > +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 > [[B_BLOCKWISE_COPY_SROA_13]], i8 0, i32 16, i1 false) > ; CHECK-NEXT: [[DATA1:%.*]] = load float, ptr undef, align 4 > ; CHECK-NEXT: [[DATA2:%.*]] = load float, ptr undef, align 4 > ; CHECK-NEXT: [[DATA3:%.*]] = load float, ptr undef, align 4 > ; CHECK-NEXT: [[DATA4:%.*]] = load float, ptr undef, align 4 > -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[DATA1]] to <4 x i8> > -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[DATA2]] to <4 x i8> > -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[DATA3]] to <4 x i8> > -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[DATA4]] to <4 x i8> > +; CHECK-NEXT: store float [[DATA1]], ptr [[B_BLOCKWISE_COPY_SROA_0]], > align 16 > +; CHECK-NEXT: store float [[DATA2]], ptr [[B_BLOCKWISE_COPY_SROA_4]], > align 4 > +; CHECK-NEXT: store float [[DATA3]], ptr [[B_BLOCKWISE_COPY_SROA_7]], > align 8 > +; CHECK-NEXT: store float [[DATA4]], ptr [[B_BLOCKWISE_COPY_SROA_10]], > align 4 > ; CHECK-NEXT: br label [[BB:%.*]] > ; CHECK: bb: > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <2 x i32> <i32 2, i32 3> > -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_2_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> <i32 2, i32 3> > -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_4_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_7_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_SROA_0_2_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> <i32 2, i32 3> > -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_7_SROA_0_2_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_SROA_0_0_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 0, i32 1> > -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_10_SROA_0_0_VEC_EXTRACT]] to half > -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_SROA_0_2_VEC_EXTRACT:%.*]] = > shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> <i32 2, i32 3> > -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i8> > [[B_BLOCKWISE_COPY_SROA_10_SROA_0_2_VEC_EXTRACT]] to half > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = > getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_4]], i64 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_4_2_B_BLOCKWISE_COPY_SROA_4_6_LOAD4:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX]], align 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_7_0_B_BLOCKWISE_COPY_SROA_7_8_LOAD5:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_7]], align 8 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_7]], i64 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_7_2_B_BLOCKWISE_COPY_SROA_7_10_LOAD6:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX]], align 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_10_0_B_BLOCKWISE_COPY_SROA_10_12_LOAD7:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_10]], align 4 > +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX:%.*]] = > getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_10]], i64 2 > +; CHECK-NEXT: > [[B_BLOCKWISE_COPY_SROA_10_2_B_BLOCKWISE_COPY_SROA_10_14_LOAD8:%.*]] = load > half, ptr [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX]], align 2 > ; CHECK-NEXT: ret void > ; > entry: > > diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll > b/llvm/test/Transforms/SROA/tbaa-struct.ll > index 3d55b72e91a60..3e9332c5b11c0 100644 > --- a/llvm/test/Transforms/SROA/tbaa-struct.ll > +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll > @@ -10,8 +10,7 @@ declare <2 x float> @foo(ptr %0) > define void @bar(ptr %y2) { > ; CHECK-LABEL: @bar( > ; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]]) > -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X14]] to <2 x i32> > -; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[Y2]], align 4, !tbaa.struct > !0 > +; CHECK-NEXT: store <2 x float> [[X14]], ptr [[Y2]], align 4, > !tbaa.struct !0 > ; CHECK-NEXT: ret void > ; > %x7 = alloca %vector > > diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll > b/llvm/test/Transforms/SROA/tbaa-struct2.ll > index e7d5f4e74de52..1c81fc6163bbc 100644 > --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll > +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll > @@ -9,11 +9,12 @@ declare double @subcall(double %g, i32 %m) > > define double @bar(ptr %wishart) { > ; CHECK-LABEL: @bar( > +; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 > ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr > [[WISHART:%.*]], align 8, !tbaa.struct !0 > ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[WISHART]], i64 8 > ; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr > [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct !7 > ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr > inbounds i8, ptr [[WISHART]], i64 12 > -; CHECK-NEXT: [[TMP_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <4 x i8>, ptr > [[TMP_SROA_3_0_WISHART_SROA_IDX]], align 4, !tbaa.struct !8 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 > [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 > false), !tbaa.struct !8 > ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double > [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) > ; CHECK-NEXT: ret double [[CALL]] > ; > > diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll > b/llvm/test/Transforms/SROA/vector-promotion.ll > index 569dd05918332..bdf50ea39af33 100644 > --- a/llvm/test/Transforms/SROA/vector-promotion.ll > +++ b/llvm/test/Transforms/SROA/vector-promotion.ll > @@ -567,9 +567,9 @@ define <4 x float> @test12(<4 x i32> %val) { > > define void @swap-8bytes(ptr %x, ptr %y) { > ; CHECK-LABEL: @swap-8bytes( > -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <8 x i8>, ptr > [[X:%.*]], align 1 > +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], > align 1 > ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr > [[Y:%.*]], i64 8, i1 false) > -; CHECK-NEXT: store <8 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 > +; CHECK-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 > ; CHECK-NEXT: ret void > ; > %tmp = alloca [2 x i32] > @@ -581,9 +581,10 @@ define void @swap-8bytes(ptr %x, ptr %y) { > > define void @swap-7bytes(ptr %x, ptr %y) { > ; CHECK-LABEL: @swap-7bytes( > -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr > [[X:%.*]], align 1 > +; CHECK-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr > [[X:%.*]], i64 7, i1 false) > ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr > [[Y:%.*]], i64 7, i1 false) > -; CHECK-NEXT: store <7 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], > i64 7, i1 false) > ; CHECK-NEXT: ret void > ; > %tmp = alloca [7 x i8] > @@ -595,9 +596,10 @@ define void @swap-7bytes(ptr %x, ptr %y) { > > define void @swap-16bytes(ptr %x, ptr %y) { > ; CHECK-LABEL: @swap-16bytes( > -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr > [[X:%.*]], align 1 > +; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr > [[X:%.*]], i64 16, i1 false) > ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr > [[Y:%.*]], i64 16, i1 false) > -; CHECK-NEXT: store <16 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align > 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], > i64 16, i1 false) > ; CHECK-NEXT: ret void > ; > %tmp = alloca [2 x i64] > @@ -609,9 +611,10 @@ define void @swap-16bytes(ptr %x, ptr %y) { > > define void @swap-15bytes(ptr %x, ptr %y) { > ; CHECK-LABEL: @swap-15bytes( > -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <15 x i8>, ptr > [[X:%.*]], align 1 > +; CHECK-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr > [[X:%.*]], i64 15, i1 false) > ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr > [[Y:%.*]], i64 15, i1 false) > -; CHECK-NEXT: store <15 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align > 1 > +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], > i64 15, i1 false) > ; CHECK-NEXT: ret void > ; > %tmp = alloca [15 x i8] > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits