simeon updated this revision to Diff 547763. simeon added a comment. Herald added subscribers: llvm-commits, kmitropoulou, ChuanqiXu, pengfei, asbirlea, haicheng, hiraditya, jvesely. Herald added a project: LLVM.
The patch now includes the changes that need to be made to the optimization passes we observed to be most negatively affected by the introduction of the intrinsic, primarily InstCombine and SROA. However, it is not comprehensive: we also observed missed optimization opportunities in other passes such as GlobalOpt and MergedLoadStoreMotionPass. A large number of hand-written unit tests need to be updated by hand but as they are very sensitive to the smallest change in the definition of the intrinsic, I am postponing this until we have some initial approval. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D152275/new/ https://reviews.llvm.org/D152275 Files: clang/lib/CodeGen/CGExpr.cpp clang/test/CodeGen/2005-01-02-ConstantInits.c clang/test/CodeGen/X86/va-arg-sse.c clang/test/CodeGen/builtin-align-array.c clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp llvm/include/llvm/Analysis/PtrUseVisitor.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/include/llvm/IR/InstVisitor.h llvm/include/llvm/IR/IntrinsicInst.h llvm/include/llvm/IR/Intrinsics.td llvm/lib/Analysis/AliasSetTracker.cpp llvm/lib/Analysis/BasicAliasAnalysis.cpp llvm/lib/Analysis/ConstantFolding.cpp llvm/lib/Analysis/InlineCost.cpp llvm/lib/Analysis/MemoryDependenceAnalysis.cpp llvm/lib/Analysis/MemoryLocation.cpp llvm/lib/Analysis/MemorySSA.cpp llvm/lib/Analysis/ObjCARCInstKind.cpp llvm/lib/Analysis/ValueTracking.cpp llvm/lib/CodeGen/CodeGenPrepare.cpp llvm/lib/CodeGen/SelectionDAG/FastISel.cpp llvm/lib/IR/Value.cpp llvm/lib/Transforms/InstCombine/InstructionCombining.cpp llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp llvm/lib/Transforms/Scalar/SROA.cpp llvm/lib/Transforms/Utils/Local.cpp llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll llvm/test/Transforms/SROA/mem-reg-decl.ll
Index: llvm/test/Transforms/SROA/mem-reg-decl.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/mem-reg-decl.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64) + +; ensure that SROA can "see through" the intrinsic call +define i32 @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + %s = alloca { i32 }, align 8 + store i32 1, ptr %s, align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 8) + %0 = load i32, ptr %arrayidx.bounded, align 8 + ret i32 %0 +} + +; variation of the above test +define i32 @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %s = alloca [1024 x i32], align 4 + %t = alloca [1024 x i32], align 4 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096) + %0 = load i32, ptr %arrayidx.bounded, align 4 + %arrayidx.bounded1 = call ptr @llvm.memory.region.decl.p0(ptr %t, i64 0, i64 4096) + %arrayidx2 = getelementptr inbounds i32, ptr %arrayidx.bounded1, i32 1 + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-MODIFY-CFG: {{.*}} +; CHECK-PRESERVE-CFG: {{.*}} Index: llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -passes=instcombine | FileCheck %s + +%struct.S = type { [1024 x i32], [1024 x i32] } + +declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64) + +; test that a GEP of a GEP can be combined in the presence of +; intermediate intrinsic calls +define i32 @test_gep_of_gep(ptr noundef %s, i64 %i) { +; CHECK-LABEL: @test_gep_of_gep( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[S:%.*]], i64 0, i32 1, i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr nonnull [[ARRAYIDX21]], i64 0, i64 4096) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_BOUNDED]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096) + %B = getelementptr inbounds %struct.S, ptr %s, i32 0, i32 1 + %arrayidx.bounded1 = call ptr @llvm.memory.region.decl.p0(ptr %B, i64 0, i64 4096) + %arrayidx2 = getelementptr inbounds [1024 x i32], ptr %arrayidx.bounded1, i64 0, i64 %i + %0 = load i32, ptr %arrayidx2, align 4 + ret i32 %0 +} + +; ensure that InstructionCombining.cpp:isAllocSiteRemovable() +; does not think that pointers may escape through the intrinsic +define i32 @test_erase_alloc_site(i32 %i) { +; CHECK-LABEL: @test_erase_alloc_site( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 0 +; +entry: + %arr = alloca [1000 x i32], align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %arr, i64 0, i64 8000) + %arrayidx1 = getelementptr inbounds i32, ptr %arrayidx.bounded, i32 %i + store i32 1, ptr %arrayidx1, align 8 + ret i32 0 +} + +; ensure that we can constant-fold a call to the intrinsic, +; thereby allowing isAllocSiteRemovable() to properly recognize +; a redundant alloca +define i32 @test_constant_fold_intrinsic() { +; CHECK-LABEL: @test_constant_fold_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + %s = alloca { i32 }, align 8 + store i32 1, ptr %s, align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 8) + %0 = load i32, ptr %arrayidx.bounded, align 8 + ret i32 %0 +} Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -467,6 +467,9 @@ II->getIntrinsicID() == Intrinsic::launder_invariant_group) return true; + if (II->getIntrinsicID() == Intrinsic::memory_region_decl) + return true; + if (II->isLifetimeStartOrEnd()) { auto *Arg = II->getArgOperand(1); // Lifetime intrinsics are dead when their right-hand is undef. Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -928,6 +928,13 @@ return Base::visitAddrSpaceCastInst(ASC); } + void visitMemRegDeclInst(MemRegDeclInst &I) { + if (I.use_empty()) + return markAsDead(I); + + return Base::visitMemRegDeclInst(I); + } + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (GEPI.use_empty()) return markAsDead(GEPI); @@ -3748,6 +3755,11 @@ return false; } + bool visitMemRegDeclInst(MemRegDeclInst &I) { + enqueueUsers(I); + return false; + } + // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2) bool foldGEPSelect(GetElementPtrInst &GEPI) { if (!GEPI.hasAllConstantIndices()) Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -770,6 +770,7 @@ case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: case Intrinsic::assume: + case Intrinsic::memory_region_decl: return true; case Intrinsic::dbg_declare: case Intrinsic::dbg_label: Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2231,6 +2231,23 @@ if (Instruction *I = visitGEPOfGEP(GEP, Src)) return I; + if (auto *SrcIntrCall = dyn_cast<CallBase>(PtrOp); SrcIntrCall && + SrcIntrCall->getIntrinsicID() == Intrinsic::memory_region_decl) + if (auto *Src = dyn_cast<GEPOperator>(SrcIntrCall->getArgOperand(0))) + if (Instruction *I = visitGEPOfGEP(GEP, Src)) { + I->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt()); + llvm::Instruction *Call = Builder.CreateCall( + Intrinsic::getDeclaration( + GEP.getModule(), + Intrinsic::memory_region_decl, + { SrcIntrCall->getType() }), + {I, + SrcIntrCall->getArgOperand(1), + SrcIntrCall->getArgOperand(2)}, + "arrayidx.bounded"); + return replaceInstUsesWith(GEP, Call); + } + // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { @@ -2393,6 +2410,7 @@ continue; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: Users.emplace_back(I); Worklist.push_back(I); continue; Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -675,6 +675,10 @@ V = Call->getArgOperand(0); continue; } + if (Call->getIntrinsicID() == Intrinsic::memory_region_decl) { + V = Call->getArgOperand(0); + continue; + } } return V; } Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1373,6 +1373,7 @@ case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::expect: { Register ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2391,7 +2391,8 @@ } case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: { + case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: { Value *ArgVal = II->getArgOperand(0); auto it = LargeOffsetGEPMap.find(II); if (it != LargeOffsetGEPMap.end()) { Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -5772,6 +5772,7 @@ switch (Call->getIntrinsicID()) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::aarch64_irg: case Intrinsic::aarch64_tagp: // The amdgcn_make_buffer_rsrc function does not alter the address of the Index: llvm/lib/Analysis/ObjCARCInstKind.cpp =================================================================== --- llvm/lib/Analysis/ObjCARCInstKind.cpp +++ llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -181,6 +181,7 @@ case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::memory_region_decl: // Don't let dbg info affect our results. case Intrinsic::dbg_declare: case Intrinsic::dbg_value: Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -294,6 +294,7 @@ switch (II->getIntrinsicID()) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::memory_region_decl: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::pseudoprobe: Index: llvm/lib/Analysis/MemoryLocation.cpp =================================================================== --- llvm/lib/Analysis/MemoryLocation.cpp +++ llvm/lib/Analysis/MemoryLocation.cpp @@ -194,6 +194,15 @@ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::memory_region_decl: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::precise( + cast<ConstantInt>(II->getArgOperand(2))->getZExtValue() - + cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()), + AATags); + case Intrinsic::masked_load: assert(ArgIdx == 0 && "Invalid argument index"); return MemoryLocation( Index: llvm/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -160,6 +160,9 @@ // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::memory_region_decl: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; case Intrinsic::masked_load: Loc = MemoryLocation::getForArgument(II, 0, TLI); return ModRefInfo::Ref; Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -2245,6 +2245,7 @@ return false; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1500,6 +1500,7 @@ case Intrinsic::fshr: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::masked_load: case Intrinsic::get_active_lane_mask: case Intrinsic::abs: Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1506,6 +1506,30 @@ if (isEscapeSource(O2) && AAQI.CI->isNotCapturedBeforeOrAt(O1, cast<Instruction>(O2))) return AliasResult::NoAlias; + + // If an underlying value is a call to a memory region declaration + // intrinsic, extract the GEP and infer upper bounds on the MemoryLocation + // size using the end offset of the region + auto *CB1 = dyn_cast<CallBase>(O1), *CB2 = dyn_cast<CallBase>(O2); + bool FoundMemRegDecl = false; + if (CB1 && CB1->getIntrinsicID() == Intrinsic::memory_region_decl) { + FoundMemRegDecl = true; + V1 = CB1->getArgOperand(0); + if (auto *End1 = dyn_cast<ConstantInt>(CB1->getArgOperand(2))) + if (auto End1Val = End1->getZExtValue(); + End1Val > 0 && V1Size.hasValue() && End1Val < V1Size.getValue()) + V1Size = LocationSize::upperBound(End1Val); + } + if (CB2 && CB2->getIntrinsicID() == Intrinsic::memory_region_decl) { + FoundMemRegDecl = true; + V2 = CB2->getArgOperand(0); + if (auto *End2 = dyn_cast<ConstantInt>(CB2->getArgOperand(2))) + if (auto End2Val = End2->getZExtValue(); + End2Val > 0 && V2Size.hasValue() && End2Val < V2Size.getValue()) + V2Size = LocationSize::upperBound(End2Val); + } + if (FoundMemRegDecl) + return aliasCheck(V1, V1Size, V2, V2Size, AAQI, CtxI); } // If the size of one access is larger than the entire object on the other Index: llvm/lib/Analysis/AliasSetTracker.cpp =================================================================== --- llvm/lib/Analysis/AliasSetTracker.cpp +++ llvm/lib/Analysis/AliasSetTracker.cpp @@ -412,6 +412,7 @@ case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::memory_region_decl: return; } } Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1544,6 +1544,14 @@ [LLVMMatchType<0>], [IntrSpeculatable, IntrNoMem, IntrWillReturn]>; +// Declares that the returned pointer (the first argument), +// and any pointer that is (transitively) def-use based on that pointer, +// points into the memory region [ptr+begin_offset, ptr+end_offset), +// or is poison otherwise. +def int_memory_region_decl : DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0> /*ptr*/, llvm_i64_ty /*begin_offset*/, llvm_i64_ty /*end_offset*/], + [IntrNoMem, IntrSpeculatable, ReadNone<ArgIndex<0>>]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : DefaultAttrsIntrinsic<[], Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -23,6 +23,7 @@ #ifndef LLVM_IR_INTRINSICINST_H #define LLVM_IR_INTRINSICINST_H +#include "Intrinsics.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -100,6 +101,7 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::memory_region_decl: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: @@ -1387,6 +1389,18 @@ Value *getSrc() const { return const_cast<Value *>(getArgOperand(1)); } }; +class MemRegDeclInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memory_region_decl; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + Value *getPtr() const { return const_cast<Value *>(getArgOperand(0)); } +}; + /// A base class for all instrprof intrinsics. class InstrProfInstBase : public IntrinsicInst { public: Index: llvm/include/llvm/IR/InstVisitor.h =================================================================== --- llvm/include/llvm/IR/InstVisitor.h +++ llvm/include/llvm/IR/InstVisitor.h @@ -10,6 +10,7 @@ #ifndef LLVM_IR_INSTVISITOR_H #define LLVM_IR_INSTVISITOR_H +#include "Intrinsics.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -216,6 +217,7 @@ RetTy visitVAStartInst(VAStartInst &I) { DELEGATE(IntrinsicInst); } RetTy visitVAEndInst(VAEndInst &I) { DELEGATE(IntrinsicInst); } RetTy visitVACopyInst(VACopyInst &I) { DELEGATE(IntrinsicInst); } + RetTy visitMemRegDeclInst(MemRegDeclInst &I) { DELEGATE(IntrinsicInst); } RetTy visitIntrinsicInst(IntrinsicInst &I) { DELEGATE(CallInst); } RetTy visitCallInst(CallInst &I) { DELEGATE(CallBase); } RetTy visitInvokeInst(InvokeInst &I) { DELEGATE(CallBase); } @@ -298,6 +300,8 @@ case Intrinsic::vastart: DELEGATE(VAStartInst); case Intrinsic::vaend: DELEGATE(VAEndInst); case Intrinsic::vacopy: DELEGATE(VACopyInst); + case Intrinsic::memory_region_decl: + DELEGATE(MemRegDeclInst); case Intrinsic::not_intrinsic: break; } } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -683,6 +683,7 @@ case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: Index: llvm/include/llvm/Analysis/PtrUseVisitor.h =================================================================== --- llvm/include/llvm/Analysis/PtrUseVisitor.h +++ llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -285,6 +285,10 @@ } } + void visitMemRegDeclInst(MemRegDeclInst &I) { + enqueueUsers(I); + } + // Generically, arguments to calls and invokes escape the pointer to some // other function. Mark that. void visitCallBase(CallBase &CB) { Index: clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp =================================================================== --- clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -31,7 +31,9 @@ // CHECK-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4 // CHECK-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED]], i64 0 +// CHECK-NEXT: store i32 3, ptr [[ARRAYIDX1]], align 4 // CHECK-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 // CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8 @@ -64,7 +66,25 @@ class A { int x; public: +// CHECK-LABEL: @_ZN1AC1Ev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN1AC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// A():x(0) {} +// CHECK-LABEL: @_ZN1AD1Ev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN1AD2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// ~A() { destroy(x); } Index: clang/test/CodeGen/builtin-align-array.c =================================================================== --- clang/test/CodeGen/builtin-align-array.c +++ clang/test/CodeGen/builtin-align-array.c @@ -7,23 +7,29 @@ // CHECK-LABEL: @test_array( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 16 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 44 -// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED]], i64 44 +// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] -// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 [[DIFF]] +// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF]] // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 22 -// CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR2]], 31 -// CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 -// CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] -// CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT6]], i64 32) ] -// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT6]]) -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 16 -// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED3:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX2]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED3]], i64 22 +// CHECK-NEXT: [[INTPTR5:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR5]], 31 +// CHECK-NEXT: [[ALIGNED_INTPTR7:%.*]] = and i64 [[OVER_BOUNDARY]], -32 +// CHECK-NEXT: [[DIFF8:%.*]] = sub i64 [[ALIGNED_INTPTR7]], [[INTPTR5]] +// CHECK-NEXT: [[ALIGNED_RESULT9:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX4]], i64 [[DIFF8]] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT9]], i64 32) ] +// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT9]]) +// CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED12:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX11]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED12]], i64 16 +// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 // CHECK-NEXT: [[SET_BITS:%.*]] = and i64 [[SRC_ADDR]], 63 // CHECK-NEXT: [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0 // CHECK-NEXT: [[CONV:%.*]] = zext i1 [[IS_ALIGNED]] to i32 @@ -39,21 +45,25 @@ // CHECK-LABEL: @test_array_should_not_mask( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 32 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 64 -// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED]], i64 64 +// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] -// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 [[DIFF]] +// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF]] // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 32 -// CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR2]], 31 -// CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 -// CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] -// CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT6]], i64 32) ] -// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT6]]) +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED3:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX2]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED3]], i64 32 +// CHECK-NEXT: [[INTPTR5:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR5]], 31 +// CHECK-NEXT: [[ALIGNED_INTPTR7:%.*]] = and i64 [[OVER_BOUNDARY]], -32 +// CHECK-NEXT: [[DIFF8:%.*]] = sub i64 [[ALIGNED_INTPTR7]], [[INTPTR5]] +// CHECK-NEXT: [[ALIGNED_RESULT9:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX4]], i64 [[DIFF8]] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT9]], i64 32) ] +// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT9]]) // CHECK-NEXT: ret i32 1 // int test_array_should_not_mask(void) { Index: clang/test/CodeGen/X86/va-arg-sse.c =================================================================== --- clang/test/CodeGen/X86/va-arg-sse.c +++ clang/test/CodeGen/X86/va-arg-sse.c @@ -22,28 +22,30 @@ // CHECK-NEXT: store i32 0, ptr [[K]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) -// CHECK-NEXT: store ptr getelementptr inbounds ([5 x %struct.S], ptr @a, i64 0, i64 2), ptr [[P]], align 8 -// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 1 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @a, i64 0, i64 60) +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYIDX_BOUNDED]], i64 2 +// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[P]], align 8 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 +// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 // CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 144 // CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] // CHECK: vaarg.in_reg: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY2]], i32 0, i32 3 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -// CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr [[TMP1]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 0 -// CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP6]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 1 -// CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[FP_OFFSET]], 32 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[TMP1]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 0 +// CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 1 +// CHECK-NEXT: store float [[TMP5]], ptr [[TMP6]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = add i32 [[FP_OFFSET]], 32 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]] // CHECK: vaarg.in_mem: -// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY2]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 // CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 // CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 // CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 @@ -51,20 +53,24 @@ // CHECK: vaarg.end: // CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARG]], ptr align 4 [[VAARG_ADDR]], i64 12, i1 false) -// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY3]]) -// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[P]], align 8 -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP15]], null +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY2]]) +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[P]], align 8 +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP8]], null // CHECK-NEXT: br i1 [[TOBOOL]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] // CHECK: land.lhs.true: -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[P]], align 8 -// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x float], ptr [[A]], i64 0, i64 2 -// CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARG]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [3 x float], ptr [[A5]], i64 0, i64 2 -// CHECK-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[TMP17]], [[TMP18]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[P]], align 8 +// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x float], ptr [[A]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED4:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX3]], i64 0, i64 12) +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[ARRAYIDX_BOUNDED4]], i64 2 +// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARG]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x float], ptr [[A6]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED8:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX7]], i64 0, i64 12) +// CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[ARRAYIDX_BOUNDED8]], i64 2 +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX9]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[TMP10]], [[TMP11]] // CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] // CHECK: if.then: // CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 @@ -73,8 +79,8 @@ // CHECK-NEXT: store i32 1, ptr [[RETVAL]], align 4 // CHECK-NEXT: br label [[RETURN]] // CHECK: return: -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP19]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP12]] // int check (int z, ...) { Index: clang/test/CodeGen/2005-01-02-ConstantInits.c =================================================================== --- clang/test/CodeGen/2005-01-02-ConstantInits.c +++ clang/test/CodeGen/2005-01-02-ConstantInits.c @@ -7,13 +7,6 @@ struct X { int a[2]; }; extern int bar(); -//. -// CHECK: @test.i23 = internal global i32 4, align 4 -// CHECK: @i = global i32 4, align 4 -// CHECK: @Arr = global [100 x i32] zeroinitializer, align 16 -// CHECK: @foo2.X = internal global ptr getelementptr (i8, ptr @Arr, i64 196), align 8 -// CHECK: @foo2.i23 = internal global i32 0, align 4 -//. // CHECK-LABEL: define {{[^@]+}}@test // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: @@ -34,12 +27,15 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @bar(ptr noundef getelementptr inbounds ([100 x i32], ptr @Arr, i64 0, i64 49)) +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @Arr, i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED]], i64 49 +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX]]) // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr @Arr, i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[CALL1:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX]]) -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[ARRAYIDX_BOUNDED1:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @Arr, i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED1]], i64 [[IDXPROM]] +// CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX2]]) +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL3]] // CHECK-NEXT: ret i32 [[ADD]] // int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); } Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -3919,10 +3919,44 @@ // Propagate the alignment from the array itself to the result. QualType arrayType = Array->getType(); - Addr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, - E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, - E->getExprLoc(), &arrayType, E->getBase()); + + Address ArrayLVAddr = ArrayLV.getAddress(*this); + + if (!getLangOpts().isSignedOverflowDefined() && + // ISO/IEC 9899:TC3, 6.5.6.8 + (getLangOpts().C99 || getLangOpts().CPlusPlus) && + getContext().getAsConstantArrayType(arrayType)) { + auto *CAT = getContext().getAsConstantArrayType(arrayType); + uint64_t BoundedRegionSize = CAT->getSize().getZExtValue() * + getContext().getTypeSize(CAT->getElementType()) / 8; + + Address BeginOff = emitArraySubscriptGEP( + *this, ArrayLVAddr, + {CGM.getSize(CharUnits::Zero()), CGM.getSize(CharUnits::Zero())}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::memory_region_decl, + BeginOff.getPointer()->getType()); + llvm::Value *Call = Builder.CreateCall(F, + {BeginOff.getPointer(), + llvm::ConstantInt::get(Int64Ty, 0), + llvm::ConstantInt::get(Int64Ty, BoundedRegionSize)}, + "arrayidx.bounded"); + Address RetAddr(Call, BeginOff.getElementType(), + ArrayLVAddr.getAlignment()); + + Addr = emitArraySubscriptGEP( + *this, RetAddr, {Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + } else { + Addr = emitArraySubscriptGEP( + *this, ArrayLVAddr, {CGM.getSize(CharUnits::Zero()), Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + } + EltBaseInfo = ArrayLV.getBaseInfo(); EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits