Author: David Green Date: 2021-01-08T09:50:10Z New Revision: 72fb5ba079019c2108d676526b5285b228795e48
URL: https://github.com/llvm/llvm-project/commit/72fb5ba079019c2108d676526b5285b228795e48 DIFF: https://github.com/llvm/llvm-project/commit/72fb5ba079019c2108d676526b5285b228795e48.diff LOG: [LV] Don't sink into replication regions The new test case here contains a first order recurrences and an instruction that is replicated. The first order recurrence forces an instruction to be sunk _into_, as opposed to after the replication region. That causes several things to go wrong including registering vector instructions multiple times and failing to create dominance relations correctly. Instead we should be sinking to after the replication region, which is what this patch makes sure happens. Differential Revision: https://reviews.llvm.org/D93629 Added: Modified: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp llvm/lib/Transforms/Vectorize/VPlan.h llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll llvm/unittests/Transforms/Vectorize/VPlanTest.cpp Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1518b757186d..0b58ad1ff2ea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8507,6 +8507,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + // If the target is in a replication region, make sure to move Sink to the + // block after it, not into the replication region itself. + if (auto *Region = + dyn_cast_or_null<VPRegionBlock>(Target->getParent()->getParent())) { + if (Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && "Expected SESE region!"); + VPBasicBlock *NextBlock = + cast<VPBasicBlock>(Region->getSuccessors().front()); + Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); + continue; + } + } Sink->moveAfter(Target); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index c6e44d11e7b3..bca6d73dc44b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -433,6 +433,14 @@ void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { insertAfter(InsertPos); } +void VPRecipeBase::moveBefore(VPBasicBlock &BB, + iplist<VPRecipeBase>::iterator I) { + assert(I == BB.end() || I->getParent() == &BB); + removeFromParent(); + Parent = &BB; + BB.getRecipeList().insert(I, this); +} + void VPInstruction::generateInstruction(VPTransformState &State, unsigned Part) { IRBuilder<> &Builder = State.Builder; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index dcc7d3db9b97..1926c9255a58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -664,6 +664,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, /// the VPBasicBlock that MovePos lives in, right after MovePos. void moveAfter(VPRecipeBase *MovePos); + /// Unlink this recipe and insert into BB before I. + /// + /// \pre I is a valid iterator into BB. + void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); + /// This method unlinks 'this' from the containing basic block, but does not /// delete it. void removeFromParent(); diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 242402d25666..ce2d2adcce99 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -645,3 +645,235 @@ for.cond: for.end: ret void } + +define i32 @sink_into_replication_region(i32 %y) { +; CHECK-LABEL: @sink_into_replication_region( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE9:%.*]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_UDIV_CONTINUE9]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE9]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK: pred.udiv.if: +; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]] +; CHECK: pred.udiv.continue: +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] +; CHECK: pred.udiv.if4: +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 1 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]] +; CHECK: pred.udiv.continue5: +; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF4]] ] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] +; CHECK: pred.udiv.if6: +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i32 2 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]] +; CHECK: pred.udiv.continue7: +; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP15]], [[PRED_UDIV_IF6]] ] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9]] +; CHECK: pred.udiv.if8: +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3 +; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i32 3 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]] +; CHECK: pred.udiv.continue9: +; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> +; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP46:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP24]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF10]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[BIN_RDX11]], i32 0 +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[TMP]] +; CHECK: bb2: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP48:!llvm.loop !.*]] +; +bb: + br label %bb2 + + bb1: ; preds = %bb2 + %tmp = phi i32 [ %tmp6, %bb2 ] + ret i32 %tmp + + bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp8, %bb2 ], [ %y, %bb ] + %tmp4 = phi i32 [ %tmp7, %bb2 ], [ 0, %bb ] + %tmp5 = phi i32 [ %tmp6, %bb2 ], [ 0, %bb ] + %tmp6 = add i32 %tmp5, %tmp4 + %tmp7 = udiv i32 219220132, %tmp3 + %tmp8 = add nsw i32 %tmp3, -1 + %tmp9 = icmp slt i32 %tmp3, 2 + br i1 %tmp9, label %bb1, label %bb2, !prof !2 +} + +define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { +; CHECK-LABEL: @sink_into_replication_region_multiple( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE16]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_STORE_CONTINUE16]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_STORE_CONTINUE16]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK: pred.udiv.if: +; CHECK-NEXT: [[TMP7:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]] +; CHECK: pred.udiv.continue: +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] +; CHECK: pred.udiv.if5: +; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]] +; CHECK: pred.udiv.continue6: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] +; CHECK: pred.udiv.if7: +; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]] +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 2 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]] +; CHECK: pred.udiv.continue8: +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP16]], [[PRED_UDIV_IF7]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] +; CHECK: pred.udiv.if9: +; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i32 3 +; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]] +; CHECK: pred.udiv.continue10: +; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> +; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI4]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP25]] +; CHECK-NEXT: store i32 [[OFFSET_IDX]], i32* [[TMP26]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK: pred.store.if11: +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]] +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.continue12: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] +; CHECK: pred.store.if13: +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]] +; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.continue14: +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.if15: +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]] +; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.continue16: +; CHECK-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP49:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP39]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF17:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[BIN_RDX18:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF17]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[BIN_RDX18]], i32 0 +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[TMP]] +; CHECK: bb2: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP50:!llvm.loop !.*]] +; +bb: + br label %bb2 + + bb1: ; preds = %bb2 + %tmp = phi i32 [ %tmp6, %bb2 ] + ret i32 %tmp + + bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp8, %bb2 ], [ %y, %bb ] + %iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ] + %tmp4 = phi i32 [ %tmp7, %bb2 ], [ 0, %bb ] + %tmp5 = phi i32 [ %tmp6, %bb2 ], [ 0, %bb ] + %g = getelementptr inbounds i32, i32* %x, i32 %iv + %tmp6 = add i32 %tmp5, %tmp4 + %tmp7 = udiv i32 219220132, %tmp3 + store i32 %tmp3, i32* %g, align 4 + %tmp8 = add nsw i32 %tmp3, -1 + %iv.next = add nsw i32 %iv, 1 + %tmp9 = icmp slt i32 %tmp3, 2 + br i1 %tmp9, label %bb1, label %bb2, !prof !2 +} + +!2 = !{!"branch_weights", i32 1, i32 1} diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index c0230774bd90..8265f339d6af 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -88,6 +88,42 @@ TEST(VPInstructionTest, moveAfter) { EXPECT_EQ(I3->getParent(), I4->getParent()); } +TEST(VPInstructionTest, moveBefore) { + VPInstruction *I1 = new VPInstruction(0, {}); + VPInstruction *I2 = new VPInstruction(1, {}); + VPInstruction *I3 = new VPInstruction(2, {}); + + VPBasicBlock VPBB1; + VPBB1.appendRecipe(I1); + VPBB1.appendRecipe(I2); + VPBB1.appendRecipe(I3); + + I1->moveBefore(VPBB1, I3->getIterator()); + + CHECK_ITERATOR(VPBB1, I2, I1, I3); + + VPInstruction *I4 = new VPInstruction(4, {}); + VPInstruction *I5 = new VPInstruction(5, {}); + VPBasicBlock VPBB2; + VPBB2.appendRecipe(I4); + VPBB2.appendRecipe(I5); + + I3->moveBefore(VPBB2, I4->getIterator()); + + CHECK_ITERATOR(VPBB1, I2, I1); + CHECK_ITERATOR(VPBB2, I3, I4, I5); + EXPECT_EQ(I3->getParent(), I4->getParent()); + + VPBasicBlock VPBB3; + + I4->moveBefore(VPBB3, VPBB3.end()); + + CHECK_ITERATOR(VPBB1, I2, I1); + CHECK_ITERATOR(VPBB2, I3, I5); + CHECK_ITERATOR(VPBB3, I4); + EXPECT_EQ(&VPBB3, I4->getParent()); +} + TEST(VPInstructionTest, setOperand) { VPValue *VPV1 = new VPValue(); VPValue *VPV2 = new VPValue(); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits