Author: Vigneshwar Jayakumar Date: 2025-07-10T12:58:34-05:00 New Revision: 7a6584e9ce89777706404cba515e8d7299f49bb2
URL: https://github.com/llvm/llvm-project/commit/7a6584e9ce89777706404cba515e8d7299f49bb2 DIFF: https://github.com/llvm/llvm-project/commit/7a6584e9ce89777706404cba515e8d7299f49bb2.diff LOG: Revert "[StructurizeCFG] Hoist and simplify zero-cost incoming else phi value…" This reverts commit 8d3f497eb834a84b954241b8c4293f8387e75576. Added: Modified: llvm/lib/Transforms/Scalar/StructurizeCFG.cpp llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll Removed: llvm/test/CodeGen/AMDGPU/structurize-hoist.ll llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll ################################################################################ diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 5b2ca8c5915ff..a69d64956d6d9 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -129,7 +128,6 @@ struct PredInfo { using BBPredicates = DenseMap<BasicBlock *, PredInfo>; using PredMap = DenseMap<BasicBlock *, BBPredicates>; using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; -using Val2BBMap = DenseMap<Value *, BasicBlock *>; // A traits type that is intended to be used in graph algorithms. The graph // traits starts at an entry node, and traverses the RegionNodes that are in @@ -281,7 +279,7 @@ class StructurizeCFG { ConstantInt *BoolTrue; ConstantInt *BoolFalse; Value *BoolPoison; - const TargetTransformInfo *TTI; + Function *Func; Region *ParentRegion; @@ -303,12 +301,8 @@ class StructurizeCFG { PredMap LoopPreds; BranchVector LoopConds; - Val2BBMap HoistedValues; - RegionNode *PrevNode; - void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB); - void orderNodes(); void analyzeLoops(RegionNode *N); @@ -338,8 +332,6 @@ class StructurizeCFG { void simplifyAffectedPhis(); - void simplifyHoistedPhis(); - DebugLoc killTerminator(BasicBlock *BB); void changeExit(RegionNode *Node, BasicBlock *NewExit, @@ -367,7 +359,7 @@ class StructurizeCFG { public: void init(Region *R); - bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI); + bool run(Region *R, DominatorTree *DT); bool makeUniformRegion(Region *R, UniformityInfo &UA); }; @@ -393,11 +385,8 @@ class StructurizeCFGLegacyPass : public RegionPass { if (SCFG.makeUniformRegion(R, UA)) return false; } - Function *F = R->getEntry()->getParent(); - const TargetTransformInfo *TTI = - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F); DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return SCFG.run(R, DT, TTI); + return SCFG.run(R, DT); } StringRef getPassName() const override { return "Structurize control flow"; } @@ -405,9 +394,7 @@ class StructurizeCFGLegacyPass : public RegionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { if (SkipUniformRegions) AU.addRequired<UniformityInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); RegionPass::getAnalysisUsage(AU); @@ -416,34 +403,6 @@ class StructurizeCFGLegacyPass : public RegionPass { } // end anonymous namespace -/// Checks whether an instruction is zero cost instruction and checks if the -/// operands are from diff erent BB. If so, this instruction can be coalesced -/// if its hoisted to predecessor block. So, this returns true. -static bool isHoistableInstruction(Instruction *I, BasicBlock *BB, - const TargetTransformInfo *TTI) { - if (I->getParent() != BB) - return false; - - // If the instruction is not a zero cost instruction, return false. - auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); - InstructionCost::CostType CostVal = - Cost.isValid() - ? Cost.getValue() - : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive; - if (CostVal != 0) - return false; - - // Check if any operands are instructions defined in the same block. - for (auto &Op : I->operands()) { - if (auto *OpI = dyn_cast<Instruction>(Op)) { - if (OpI->getParent() == BB) - return false; - } - } - - return true; -} - char StructurizeCFGLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", @@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", "Structurize the CFG", false, false) -/// Structurization can introduce unnecessary VGPR copies due to register -/// coalescing interference. For example, if the Else block has a zero-cost -/// instruction and the Then block modifies the VGPR value, only one value is -/// live at a time in merge block before structurization. After structurization, -/// the coalescer may incorrectly treat the Then value as live in the Else block -/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies. -/// -/// This function examines phi nodes whose incoming values are zero-cost -/// instructions in the Else block. It identifies such values that can be safely -/// hoisted and moves them to the nearest common dominator of Then and Else -/// blocks. A follow-up function after setting PhiNodes assigns the hoisted -/// value to poison phi nodes along the if→flow edge, aiding register coalescing -/// and minimizing unnecessary live ranges. -void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, - BasicBlock *ThenBB) { - - BasicBlock *ElseSucc = ElseBB->getSingleSuccessor(); - BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB); - - if (!ElseSucc || !CommonDominator) - return; - Instruction *Term = CommonDominator->getTerminator(); - for (PHINode &Phi : ElseSucc->phis()) { - Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB); - auto *Inst = dyn_cast<Instruction>(ElseVal); - if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI)) - continue; - Inst->removeFromParent(); - Inst->insertInto(CommonDominator, Term->getIterator()); - HoistedValues[Inst] = CommonDominator; - } -} - /// Build up the general order of nodes, by performing a topological sort of the /// parent region's nodes, while ensuring that there is no outer cycle node /// between any two inner cycle nodes. @@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Other = Term->getSuccessor(!i); if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - hoistZeroCostElseBlockPhiValues(Succ, Other); + Pred[Other] = {BoolFalse, std::nullopt}; Pred[P] = {BoolTrue, std::nullopt}; continue; @@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() { AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); } -/// Updates PHI nodes after hoisted zero cost instructions by replacing poison -/// entries on Flow nodes with the appropriate hoisted values -void StructurizeCFG::simplifyHoistedPhis() { - for (WeakVH VH : AffectedPhis) { - PHINode *Phi = dyn_cast_or_null<PHINode>(VH); - if (!Phi || Phi->getNumIncomingValues() != 2) - continue; - - for (int i = 0; i < 2; i++) { - Value *V = Phi->getIncomingValue(i); - auto BBIt = HoistedValues.find(V); - - if (BBIt == HoistedValues.end()) - continue; - - Value *OtherV = Phi->getIncomingValue(!i); - PHINode *OtherPhi = dyn_cast<PHINode>(OtherV); - if (!OtherPhi) - continue; - - int PoisonValBBIdx = -1; - for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) { - if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i))) - continue; - PoisonValBBIdx = i; - break; - } - if (PoisonValBBIdx == -1 || - !DT->dominates(BBIt->second, - OtherPhi->getIncomingBlock(PoisonValBBIdx))) - continue; - - OtherPhi->setIncomingValue(PoisonValBBIdx, V); - Phi->setIncomingValue(i, OtherV); - } - } -} - void StructurizeCFG::simplifyAffectedPhis() { bool Changed; do { @@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { } /// Run the transformation for each region found -bool StructurizeCFG::run(Region *R, DominatorTree *DT, - const TargetTransformInfo *TTI) { +bool StructurizeCFG::run(Region *R, DominatorTree *DT) { if (R->isTopLevelRegion()) return false; this->DT = DT; - this->TTI = TTI; + Func = R->getEntry()->getParent(); assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); @@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT, insertConditions(false); insertConditions(true); setPhiValues(); - simplifyHoistedPhis(); simplifyConditions(); simplifyAffectedPhis(); rebuildSSA(); @@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, bool Changed = false; DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); auto &RI = AM.getResult<RegionInfoAnalysis>(F); - TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F); + UniformityInfo *UI = nullptr; if (SkipUniformRegions) UI = &AM.getResult<UniformityInfoAnalysis>(F); @@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, continue; } - Changed |= SCFG.run(R, DT, TTI); + Changed |= SCFG.run(R, DT); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index be020457ce87d..9cc42ac448067 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6 ; CHECK-NEXT: s_cbranch_execz .LBB8_6 ; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader -; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; CHECK-NEXT: s_movk_i32 s4, 0xf800 ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop @@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6 ; ALIGNED-NEXT: s_cbranch_execz .LBB8_6 ; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader -; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; ALIGNED-NEXT: s_movk_i32 s4, 0xf800 ; ALIGNED-NEXT: s_mov_b32 s5, -1 ; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop @@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024 ; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020 ; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016 -; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1 ; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0 +; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1 ; UNROLL3-NEXT: s_waitcnt vmcnt(3) ; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028 ; UNROLL3-NEXT: s_waitcnt vmcnt(2) diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll deleted file mode 100644 index 42436a1b4c279..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll +++ /dev/null @@ -1,180 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s - - -%pair = type { i32, i32 } - -define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) { -; GFX900-LABEL: test_extractvalue_then_else: -; GFX900: ; %bb.0: ; %if -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_load_dword v3, v[0:1] -; GFX900-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 -; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB0_2 -; GFX900-NEXT: ; %bb.1: ; %else -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, 1, v3 -; GFX900-NEXT: .LBB0_2: ; %Flow -; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX900-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_store_dword v[0:1], v3 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_setpc_b64 s[30:31] -if: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %then, label %else - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) { -; GFX900-LABEL: test_extractvalue_else_then: -; GFX900: ; %bb.0: ; %if -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_load_dword v3, v[0:1] -; GFX900-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 -; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB1_2 -; GFX900-NEXT: ; %bb.1: ; %else -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, 1, v3 -; GFX900-NEXT: .LBB1_2: ; %merge -; GFX900-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_store_dword v[0:1], v3 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_setpc_b64 s[30:31] -if: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %else, label %then - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 { -; GFX900-LABEL: test_loop_with_if: -; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: s_mov_b64 s[4:5], 0 -; GFX900-NEXT: s_movk_i32 s10, 0xfe -; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: s_bitcmp1_b32 s2, 0 -; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3] -; GFX900-NEXT: v_mov_b32_e32 v2, s1 -; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1 -; GFX900-NEXT: v_mov_b32_e32 v1, s0 -; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3 -; GFX900-NEXT: s_branch .LBB2_2 -; GFX900-NEXT: .LBB2_1: ; %latch -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v5, 20, v3 -; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5 -; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX900-NEXT: flat_store_dword v[1:2], v3 -; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB2_8 -; GFX900-NEXT: .LBB2_2: ; %loop -; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2] -; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX900-NEXT: s_mov_b64 s[6:7], 0 -; GFX900-NEXT: s_cbranch_vccnz .LBB2_4 -; GFX900-NEXT: ; %bb.3: ; %if -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5 -; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec -; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec -; GFX900-NEXT: s_mov_b64 s[6:7], -1 -; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GFX900-NEXT: .LBB2_4: ; %Flow -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9] -; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13] -; GFX900-NEXT: s_cbranch_execz .LBB2_6 -; GFX900-NEXT: ; %bb.5: ; %else -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, v3, v4 -; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec -; GFX900-NEXT: .LBB2_6: ; %Flow1 -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] -; GFX900-NEXT: s_cbranch_execz .LBB2_1 -; GFX900-NEXT: ; %bb.7: ; %then -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: flat_store_dword v[1:2], v0 -; GFX900-NEXT: s_branch .LBB2_1 -; GFX900-NEXT: .LBB2_8: ; %end -; GFX900-NEXT: s_endpgm -entry: - %a = tail call i32 @llvm.amdgcn.workitem.id.x() - br label %loop - -loop: - %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ] - %load = load %pair, ptr %ptr - br i1 %cond, label %if, label %else - -if: - %cmp = icmp sgt i32 %entry_phi, 10 - br i1 %cmp, label %then, label %else - -then: - %a_then = extractvalue %pair %load, 0 - store i32 %a, ptr %ptr, align 4 - br label %latch - -else: - %a2 = extractvalue %pair %load, 1 - %y = extractvalue %pair %load, 0 - %a_else = add i32 %y, %a2 - br label %latch - -latch: - %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ] - store i32 %a_test, ptr %ptr - %a15 = add nsw i32 %a_test, 20 - %a16 = icmp slt i32 %a15, 255 - br i1 %a16, label %loop, label %end - -end: - ret void -} diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll deleted file mode 100644 index 10d4fa2be0a70..0000000000000 --- a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll +++ /dev/null @@ -1,161 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s - - -%pair = type { i32, i32 } -define void @test_if_then_else(ptr %ptr, i1 %cond) { -; CHECK-LABEL: define void @test_if_then_else( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br label %[[MERGE]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[MERGE]]: -; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret void -; -entry: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %then, label %else - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define void @test_if_else_then(ptr %ptr, i1 %cond) { -; CHECK-LABEL: define void @test_if_else_then( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1 -; CHECK-NEXT: br label %[[MERGE]] -; CHECK: [[MERGE]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ] -; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret void -; -entry: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %else, label %then - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 { -; CHECK-LABEL: define amdgpu_kernel void @test_loop_with_if( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[I3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I15:%.*]], %[[LATCH:.*]] ] -; CHECK-NEXT: [[LOAD:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0 -; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[FLOW:.*]] -; CHECK: [[IF]]: -; CHECK-NEXT: [[I9:%.*]] = icmp sle i32 [[I3]], 10 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[FLOW1:.*]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[Y:%.*]], %[[ELSE:.*]] ], [ [[A_THEN]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ [[TMP2:%.*]], %[[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: store i32 [[I]], ptr [[PTR]], align 4 -; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP2]] = phi i1 [ true, %[[IF]] ], [ false, %[[LOOP]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[I9]], %[[IF]] ], [ [[COND_INV]], %[[LOOP]] ] -; CHECK-NEXT: br i1 [[TMP3]], label %[[ELSE]], label %[[FLOW1]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[I2:%.*]] = extractvalue [[PAIR]] [[LOAD]], 1 -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0 -; CHECK-NEXT: [[Y]] = add i32 [[A_ELSE]], [[I2]] -; CHECK-NEXT: br label %[[FLOW1]] -; CHECK: [[LATCH]]: -; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[I15]] = add nsw i32 [[TMP0]], 20 -; CHECK-NEXT: [[I16:%.*]] = icmp sge i32 [[I15]], 255 -; CHECK-NEXT: br i1 [[I16]], label %[[END:.*]], label %[[LOOP]] -; CHECK: [[END]]: -; CHECK-NEXT: ret void -; -entry: - %a = tail call i32 @llvm.amdgcn.workitem.id.x() - br label %loop - -loop: - %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ] - %load = load %pair, ptr %ptr - br i1 %cond, label %if, label %else - -if: - %cmp = icmp sgt i32 %entry_phi, 10 - br i1 %cmp, label %then, label %else - -then: - %a_then = extractvalue %pair %load, 0 - store i32 %a, ptr %ptr, align 4 - br label %latch - -else: - %a2 = extractvalue %pair %load, 1 - %y = extractvalue %pair %load, 0 - %a_else = add i32 %y, %a2 - br label %latch - -latch: - %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ] - store i32 %a_test, ptr %ptr - %a15 = add nsw i32 %a_test, 20 - %a16 = icmp slt i32 %a15, 255 - br i1 %a16, label %loop, label %end - -end: - ret void -} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits