https://github.com/jdenny-ornl updated https://github.com/llvm/llvm-project/pull/159163
>From 5a9959313c0aebc1c707d19e30055cb925be7760 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Tue, 16 Sep 2025 16:03:11 -0400 Subject: [PATCH 1/3] [LoopUnroll] Fix block frequencies for epilogue As another step in issue #135812, this patch fixes block frequencies for partial loop unrolling with an epilogue remainder loop. It does not fully handle the case when the epilogue loop itself is unrolled. That will be handled in the next patch. For the guard and latch of each of the unrolled loop and epilogue loop, this patch sets branch weights derived directly from the original loop latch branch weights. The total frequency of the original loop body, summed across all its occurrences in the unrolled loop and epilogue loop, is the same as in the original loop. This patch also sets `llvm.loop.estimated_trip_count` for the epilogue loop instead of relying on the epilogue's latch branch weights to imply it. This patch removes the XFAIL directives that PR #157754 added to the test suite. --- .../include/llvm/Transforms/Utils/LoopUtils.h | 32 ++++ .../llvm/Transforms/Utils/UnrollLoop.h | 4 +- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 31 ++-- .../Transforms/Utils/LoopUnrollRuntime.cpp | 94 ++++++++-- llvm/lib/Transforms/Utils/LoopUtils.cpp | 48 ++++++ .../branch-weights-freq/unroll-epilog.ll | 160 ++++++++++++++++++ .../runtime-exit-phi-scev-invalidation.ll | 4 +- .../LoopUnroll/runtime-loop-branchweight.ll | 56 +++++- .../Transforms/LoopUnroll/runtime-loop.ll | 9 +- .../LoopUnroll/unroll-heuristics-pgo.ll | 64 +++++-- 10 files changed, 448 insertions(+), 54 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index c5dbb2bdd1dd8..71754b8f62a16 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -365,6 +365,38 @@ LLVM_ABI bool setLoopEstimatedTripCount( Loop *L, unsigned EstimatedTripCount, std::optional<unsigned> EstimatedLoopInvocationWeight = std::nullopt); +/// Based on branch weight metadata, return either: +/// - \c std::nullopt if the implementation is unable to handle the loop form +/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - Else, the estimated probability that, at the end of any iteration, the +/// latch of \p L will start another iteration. The result \c P is such that +/// `0 <= P <= 1`, and `1 - P` is the probability of exiting the loop. +std::optional<double> getLoopProbability(Loop *L); + +/// Set branch weight metadata for the latch of \p L to indicate that, at the +/// end of any iteration, its estimated probability of starting another +/// iteration is \p P. Return false if the implementation is unable to handle +/// the loop form of \p L (e.g., \p L must have a latch block that controls the +/// loop exit). Otherwise, return true. +bool setLoopProbability(Loop *L, double P); + +/// Based on branch weight metadata, return either: +/// - \c std::nullopt if the implementation cannot extract the probability +/// (e.g., \p B must have exactly two target labels, so it must be a +/// conditional branch). +/// - The probability \c P that control flows from \p B to its first target +/// label such that `1 - P` is the probability of control flowing to its +/// second target label, or vice-versa if \p ForFirstTarget is false. +std::optional<double> getBranchProbability(BranchInst *B, bool ForFirstTarget); + +/// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are +/// the probabilities of control flowing to its first and second target labels, +/// respectively, or vice-versa if \p ForFirstTarget is false. Return false if +/// the implementation cannot set the probability (e.g., \p B must have exactly +/// two target labels, so it must be a conditional branch). Otherwise, return +/// true. +bool setBranchProbability(BranchInst *B, double P, bool ForFirstTarget); + /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially /// true. diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 871c13d972470..571a0af6fd0db 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -97,7 +97,9 @@ LLVM_ABI bool UnrollRuntimeLoopRemainder( LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, - Loop **ResultLoop = nullptr); + Loop **ResultLoop = nullptr, + std::optional<unsigned> OriginalTripCount = std::nullopt, + std::optional<double> OriginalLoopProb = std::nullopt); LLVM_ABI LoopUnrollResult UnrollAndJamLoop( Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 0192ce6a960ff..b40d81100a2bd 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -65,6 +65,7 @@ #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <assert.h> +#include <cmath> #include <numeric> #include <type_traits> #include <vector> @@ -501,6 +502,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); std::optional<unsigned> OriginalTripCount = llvm::getLoopEstimatedTripCount(L); + std::optional<double> OriginalLoopProb = llvm::getLoopProbability(L); // Effectively "DCE" unrolled iterations that are beyond the max tripcount // and will never be executed. @@ -591,11 +593,11 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, : isEpilogProfitable(L); if (ULO.Runtime && - !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, - EpilogProfitability, ULO.UnrollRemainder, - ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, - PreserveLCSSA, ULO.SCEVExpansionBudget, - ULO.RuntimeUnrollMultiExit, RemainderLoop)) { + !UnrollRuntimeLoopRemainder( + L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, + ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, + PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit, + RemainderLoop, OriginalTripCount, OriginalLoopProb)) { if (ULO.Force) ULO.Runtime = false; else { @@ -1130,13 +1132,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, LI->erase(L); // We shouldn't try to use `L` anymore. L = nullptr; - } else if (OriginalTripCount) { + } else { // Update metadata for the loop's branch weights and estimated trip count: // - If ULO.Runtime, UnrollRuntimeLoopRemainder sets the guard branch // weights, latch branch weights, and estimated trip count of the // remainder loop it creates. It also sets the branch weights for the // unrolled loop guard it creates. The branch weights for the unrolled - // loop latch are adjusted below. FIXME: Actually handle ULO.Runtime. + // loop latch are adjusted below. FIXME: Handle prologue loops. // - Otherwise, if unrolled loop iteration latches become unconditional, // branch weights are adjusted above. FIXME: Actually handle such // unconditional latches. @@ -1159,10 +1161,17 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // the unrolled loop as a whole without considering the branch weights for // each unrolled iteration's latch within it, we store the new trip count as // separate metadata. - unsigned NewTripCount = *OriginalTripCount / ULO.Count; - if (!ULO.Runtime && *OriginalTripCount % ULO.Count) - NewTripCount += 1; - setLoopEstimatedTripCount(L, NewTripCount); + if (OriginalLoopProb && ULO.Runtime && EpilogProfitability) { + // Where p is always the probability of executing at least 1 more + // iteration, the probability for at least n more iterations is p^n. + setLoopProbability(L, pow(*OriginalLoopProb, ULO.Count)); + } + if (OriginalTripCount) { + unsigned NewTripCount = *OriginalTripCount / ULO.Count; + if (!ULO.Runtime && *OriginalTripCount % ULO.Count) + NewTripCount += 1; + setLoopEstimatedTripCount(L, NewTripCount); + } } // LoopInfo should not be valid, confirm that. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 6312831cf0ee0..9ba93fcade2bd 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -40,6 +40,7 @@ #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include <cmath> using namespace llvm; @@ -195,6 +196,20 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, } } +/// Assume, due to our position in the remainder loop or its guard, anywhere +/// from 0 to \p N more iterations can possibly execute. Among such cases in +/// the original loop (with loop probability \p OriginalLoopProb), what is the +/// probability of executing at least one more iteration? +static double probOfNextInRemainder(double OriginalLoopProb, unsigned N) { + // Each of these variables holds the original loop's probability that the + // number of iterations it will execute is some m in the specified range. + double ProbOne = OriginalLoopProb; // 1 <= m + double ProbTooMany = pow(ProbOne, N + 1); // N + 1 <= m + double ProbNotTooMany = 1 - ProbTooMany; // 0 <= m <= N + double ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N + return ProbOneNotTooMany / ProbNotTooMany; +} + /// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the @@ -221,7 +236,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, - unsigned Count, AssumptionCache &AC) { + unsigned Count, AssumptionCache &AC, + std::optional<double> OriginalLoopProb) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); @@ -332,12 +348,18 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, PreserveLCSSA); // Add the branch to the exit block (around the epilog loop) MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*Latch->getTerminator())) { + if (!OriginalLoopProb && hasBranchWeightMD(*Latch->getTerminator())) { // Assume equal distribution in interval [0, Count). MDBuilder MDB(B.getContext()); BranchWeights = MDB.createBranchWeights(1, Count - 1); } - B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); + BranchInst *RemainderLoopGuard = + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); + if (OriginalLoopProb) { + setBranchProbability(RemainderLoopGuard, + probOfNextInRemainder(*OriginalLoopProb, Count - 1), + /*ForFirstTarget=*/true); + } InsertPt->eraseFromParent(); if (DT) { auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit); @@ -357,14 +379,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// The cloned blocks should be inserted between InsertTop and InsertBot. /// InsertTop should be new preheader, InsertBot new loop exit. /// Returns the new cloned loop that is created. -static Loop * -CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, - const bool UnrollRemainder, - BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *Preheader, +static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, + const bool UseEpilogRemainder, + const bool UnrollRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, unsigned Count) { + DominatorTree *DT, LoopInfo *LI, unsigned Count, + std::optional<unsigned> OriginalTripCount, + std::optional<double> OriginalLoopProb) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -419,7 +442,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*LatchBR)) { + if (!(OriginalLoopProb && UseEpilogRemainder) && + hasBranchWeightMD(*LatchBR)) { uint32_t ExitWeight; uint32_t BackEdgeWeight; if (Count >= 3) { @@ -437,7 +461,25 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, MDBuilder MDB(Builder.getContext()); BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight); } - Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); + BranchInst *RemainderLoopLatch = + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); + if (OriginalLoopProb && UseEpilogRemainder) { + // Compute the total frequency of the original loop body from the + // remainder iterations. Once we've reached them, the first of them + // always executes, so it's frequency and probability are 1. + double FreqRemIters = 1; + if (Count > 2) { + double ProbReaching = 1; + for (unsigned N = Count - 2; N >= 1; --N) { + ProbReaching *= probOfNextInRemainder(*OriginalLoopProb, N); + FreqRemIters += ProbReaching; + } + } + // Solve for the loop probability that would produce that frequency. + // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters. + double Prob = 1 - 1 / FreqRemIters; + setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true); + } NewIdx->addIncoming(Zero, InsertTop); NewIdx->addIncoming(IdxNext, NewBB); LatchBR->eraseFromParent(); @@ -469,6 +511,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, std::optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); + if (OriginalTripCount && UseEpilogRemainder) + setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count); if (NewLoopID) { NewLoop->setLoopID(*NewLoopID); @@ -603,7 +647,8 @@ bool llvm::UnrollRuntimeLoopRemainder( LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, - Loop **ResultLoop) { + Loop **ResultLoop, std::optional<unsigned> OriginalTripCount, + std::optional<double> OriginalLoopProb) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -823,12 +868,23 @@ bool llvm::UnrollRuntimeLoopRemainder( BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*Latch->getTerminator())) { + if (!(OriginalLoopProb && UseEpilogRemainder) && + hasBranchWeightMD(*Latch->getTerminator())) { // Assume loop is nearly always entered. MDBuilder MDB(B.getContext()); BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights); } - B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); + BranchInst *UnrollingLoopGuard = + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); + if (OriginalLoopProb && UseEpilogRemainder) { + // The original loop's first iteration always happens. Compute the + // probability of the original loop executing Count-1 iterations after that + // to complete the first iteration of the unrolled loop. + double ProbOne = *OriginalLoopProb; + double ProbRest = pow(ProbOne, Count - 1); + setBranchProbability(UnrollingLoopGuard, ProbRest, + /*ForFirstTarget=*/false); + } PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) @@ -855,9 +911,10 @@ bool llvm::UnrollRuntimeLoopRemainder( // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, - NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count); + Loop *remainderLoop = + CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, + LI, Count, OriginalTripCount, OriginalLoopProb); // Insert the cloned blocks into the function. F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end()); @@ -956,7 +1013,8 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC, + OriginalLoopProb); // Update counter in loop for unrolling. // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 7b1a7ce6995f8..4c05774f7f48c 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include <cmath> using namespace llvm; using namespace llvm::PatternMatch; @@ -972,6 +973,53 @@ bool llvm::setLoopEstimatedTripCount( return true; } +std::optional<double> llvm::getLoopProbability(Loop *L) { + BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); + if (!LatchBranch) + return std::nullopt; + bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader(); + return getBranchProbability(LatchBranch, FirstTargetIsLoop); +} + +bool llvm::setLoopProbability(Loop *L, double P) { + BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); + if (!LatchBranch) + return false; + bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader(); + return setBranchProbability(LatchBranch, P, FirstTargetIsLoop); +} + +std::optional<double> llvm::getBranchProbability(BranchInst *B, + bool ForFirstTarget) { + if (B->getNumSuccessors() != 2) + return std::nullopt; + uint64_t Weight0, Weight1; + if (!extractBranchWeights(*B, Weight0, Weight1)) + return std::nullopt; + if (!ForFirstTarget) + std::swap(Weight0, Weight1); + return double(Weight0) / (double(Weight0) + double(Weight1)); +} + +bool llvm::setBranchProbability(BranchInst *B, double P, bool ForFirstTarget) { + if (B->getNumSuccessors() != 2) + return false; + + // Sum should be some large number so that the weights accurately encode P, + // but it should not be so large that some branch weights will print as + // negative in LLVM IR as that makes LLVM tests harder to maintain. + const uint64_t Sum = 1000000000; + uint64_t Weight0 = round(P * Sum); + uint64_t Weight1 = round((1 - P) * Sum); + if (!ForFirstTarget) + std::swap(Weight0, Weight1); + + MDBuilder MDB(B->getContext()); + B->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Weight0, Weight1)); + return true; +} + bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, ScalarEvolution &SE) { Loop *OuterL = InnerLoop->getParentLoop(); diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll new file mode 100644 index 0000000000000..c7c91c6f6b815 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -0,0 +1,160 @@ +; Test branch weight metadata, estimated trip count metadata, and block +; frequencies after loop unrolling with an epilogue. + +; ------------------------------------------------------------------------------ +; Define substitutions. +; +; Check original loop body frequency. +; DEFINE: %{bf-fc} = opt %s -S -passes='print<block-freq>' 2>&1 | \ +; DEFINE: FileCheck %s -check-prefixes +; +; Unroll loops and then check block frequency. The -implicit-check-not options +; make sure that no additional labels or @f calls show up. +; DEFINE: %{ur-bf} = opt %s -S -passes='loop-unroll,print<block-freq>' 2>&1 +; DEFINE: %{fc} = FileCheck %s \ +; DEFINE: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \ +; DEFINE: -implicit-check-not='call void @f' -check-prefixes + +; ------------------------------------------------------------------------------ +; Check various interesting unroll count values relative to the original loop's +; estimated trip count of 11 (e.g., minimum and boundary values). +; +; RUN: %{bf-fc} ALL,ORIG +; RUN: %{ur-bf} -unroll-count=2 -unroll-runtime | %{fc} ALL,UR,UR2 +; RUN: %{ur-bf} -unroll-count=4 -unroll-runtime | %{fc} ALL,UR,UR4 +; RUN: %{ur-bf} -unroll-count=10 -unroll-runtime | %{fc} ALL,UR,UR10 +; RUN: %{ur-bf} -unroll-count=11 -unroll-runtime | %{fc} ALL,UR,UR11 +; RUN: %{ur-bf} -unroll-count=12 -unroll-runtime | %{fc} ALL,UR,UR12 + +; ------------------------------------------------------------------------------ +; Check the iteration frequencies, which, when each is multiplied by the number +; of original loop bodies that execute within it, should sum to almost exactly +; the original loop body frequency. +; +; ALL-LABEL: block-frequency-info: test +; +; ORIG: - [[ENTRY:.*]]: +; ORIG: - [[DO_BODY:.*]]: float = 11.0, +; ORIG: - [[DO_END:.*]]: +; +; UR: - [[ENTRY:.*]]: +; UR: - [[ENTRY_NEW:.*]]: +; UR2: - [[DO_BODY:.*]]: float = 5.2381, +; UR4: - [[DO_BODY:.*]]: float = 2.3702, +; UR10: - [[DO_BODY:.*]]: float = 0.6902, +; UR11: - [[DO_BODY:.*]]: float = 0.59359, +; UR12: - [[DO_BODY:.*]]: float = 0.5144, +; UR: - [[DO_END_UNR_LCSSA:.*]]: +; UR: - [[DO_BODY_EPIL_PREHEADER:.*]]: +; UR2: - [[DO_BODY_EPIL:.*]]: float = 0.52381, +; UR4: - [[DO_BODY_EPIL:.*]]: float = 1.5193, +; UR10: - [[DO_BODY_EPIL:.*]]: float = 4.098, +; UR11: - [[DO_BODY_EPIL:.*]]: float = 4.4705, +; UR12: - [[DO_BODY_EPIL:.*]]: float = 4.8272, +; UR4: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR10: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR11: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR12: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR: - [[DO_END:.*]]: + +; ------------------------------------------------------------------------------ +; Check the CFGs, including the number of original loop bodies that appear +; within each unrolled iteration. +; +; UR-LABEL: define void @test(i32 %{{.*}}) { +; UR: [[ENTRY]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[ENTRY_NEW]], !prof ![[#PROF_UR_GUARD:]]{{$}} +; UR: [[ENTRY_NEW]]: +; UR: br label %[[DO_BODY]] +; UR: [[DO_BODY]]: +; UR2-COUNT-2: call void @f +; UR4-COUNT-4: call void @f +; UR10-COUNT-10: call void @f +; UR11-COUNT-11: call void @f +; UR12-COUNT-12: call void @f +; UR: br i1 %{{.*}}, label %[[DO_END_UNR_LCSSA]], label %[[DO_BODY]], !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]{{$}} +; UR: [[DO_END_UNR_LCSSA]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[DO_END:.*]], !prof ![[#PROF_RM_GUARD:]]{{$}} +; UR: [[DO_BODY_EPIL_PREHEADER]]: +; UR: br label %[[DO_BODY_EPIL]] +; UR: [[DO_BODY_EPIL]]: +; UR: call void @f +; UR4: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR10: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR11: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR12: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR4: [[DO_END_EPILOG_LCSSA]]: +; UR10: [[DO_END_EPILOG_LCSSA]]: +; UR11: [[DO_END_EPILOG_LCSSA]]: +; UR12: [[DO_END_EPILOG_LCSSA]]: +; UR: br label %[[DO_END]] +; UR: [[DO_END]]: +; UR: ret void + +declare void @f(i32) + +define void @test(i32 %n) { +entry: + br label %do.body + +do.body: + %i = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %inc = add i32 %i, 1 + call void @f(i32 %i) + %c = icmp sge i32 %inc, %n + br i1 %c, label %do.end, label %do.body, !prof !0 + +do.end: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 10} + +; ------------------------------------------------------------------------------ +; Check branch weight metadata and estimated trip count metadata. +; +; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 9090909, i32 90909091} +; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 24868520, i32 75131480} +; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 57590238, i32 42409762} +; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 61445671, i32 38554329} +; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 64950610, i32 35049390} +; +; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17355372, i32 82644628} +; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 31698654, i32 68301346} +; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 61445671, i32 38554329} +; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 64950610, i32 35049390} +; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 68136918, i32 31863082} +; +; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; +; UR2: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5} +; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2} +; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} +; +; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 47619048, i32 52380952} +; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 71320836, i32 28679164} +; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 85204964, i32 14795036} +; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 86003351, i32 13996649} +; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 86657880, i32 13342120} +; +; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 48361934, i32 51638066} +; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 77129012, i32 22870988} +; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 78838041, i32 21161959} +; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 80252977, i32 19747023} + +; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; +; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; For UR10, llvm.loop.estimated_trip_count is the same for both loops. +; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll index 0c52b5a0edef8..047360178aa06 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll @@ -188,7 +188,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7 -; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF6:![0-9]+]] ; CHECK: outer.header.new: ; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]] ; CHECK: inner.1.header: @@ -232,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8 ; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8 ; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0 -; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: outer.middle.unr-lcssa: ; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ] ; CHECK-NEXT: br label [[OUTER_MIDDLE]] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 36c0497a002cf..489f7924f9054 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -1,14 +1,25 @@ ; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s -; XFAIL: * ;; Check that the remainder loop is properly assigned a branch weight for its latch branch. ; CHECK-LABEL: @test( -; CHECK-LABEL: for.body: -; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]] -; CHECK-LABEL: for.body.epil: -; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]] -; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499} -; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1} +; CHECK-LABEL: entry: +; CHECK: [[FOR_BODY_PREHEADER:.*]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[FOR_BODY_PREHEADER_NEW]]: +; CHECK: br label %for.body +; CHECK: for.body: +; CHECK: %add = add +; CHECK: %add.1 = add +; CHECK: %add.2 = add +; CHECK: %add.3 = add +; CHECK-NOT: %add.4 = add +; CHECK: br i1 %{{.*}}, label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %for.body, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]], !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[FOR_BODY_EPIL_PREHEADER]]: +; CHECK: br label %[[FOR_BODY_EPIL:.*]] +; CHECK: [[FOR_BODY_EPIL]]: +; CHECK: br i1 {{.*}}, label %[[FOR_BODY_EPIL]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i3 @test(ptr %a, i3 %n) { entry: @@ -32,3 +43,34 @@ for.end: } !0 = !{!"branch_weights", i32 1, i32 9999} + +; Original loop probability: p = 9999/(1+9999) = 0.9999 +; Original estimated trip count: (1+9999)/1 = 10000 +; Unroll count: 4 + +; Probability of >=3 iterations after first: p^3 = 0.9970003 +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 29997, i32 99970003} + +; Probability of >=4 more iterations: p^4 = 0.99960006 +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 39994, i32 99960006} + +; 10000//4 = 2500 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2500} +; +; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} + +; Probability of 1 to 3 more of 3 more remainder iterations: +; (p-p^4)/(1-p^4) = 0.749962497 +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 74996250, i32 25003750} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331 +; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332. +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 49998333, i32 50001667} + +; 10000%4 = 0 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index 492de063573be..0bc2e6aa7810a 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -295,11 +295,12 @@ exit2.loopexit: ; COMMON-LABEL: {{^}}!0 = ; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} -; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127} -; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7} -; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1} +; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 15186332, i32 84813668} +; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 86446668, i32 13553332} +; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 74397846, i32 25602154} -; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} ; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"} ; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index d59a0298b3106..c9e51ea4cebbe 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -1,17 +1,29 @@ ; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-max-percent-threshold-boost=100 | FileCheck %s -; XFAIL: * @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 ; CHECK-LABEL: @bar_prof -; CHECK: loop: -; CHECK: %mul = mul -; CHECK: %mul.1 = mul -; CHECK: %mul.2 = mul -; CHECK: %mul.3 = mul -; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]] -; CHECK: loop.epil: -; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}} +; CHECK: entry: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[ENTRY_NEW]]: +; CHECK: br label %loop +; CHECK: loop: +; CHECK: %mul = mul +; CHECK: %mul.1 = mul +; CHECK: %mul.2 = mul +; CHECK: %mul.3 = mul +; CHECK: %mul.4 = mul +; CHECK: %mul.5 = mul +; CHECK: %mul.6 = mul +; CHECK: %mul.7 = mul +; CHECK-NOT: %mul.8 = mul +; CHECK: br i1 %{{.*}}, label %[[LOOP_END_UNR_LCSSA:.*]], label %loop, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[LOOP_END_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER]], label %loop.end, !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL]], label %[[LOOP_END_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 { entry: br label %loop @@ -61,5 +73,35 @@ loop.end: !1 = !{!"function_entry_count", i64 1} !2 = !{!"branch_weights", i32 1, i32 1000} -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1} +; Original loop probability: p = 1000/(1+1000) = 0.999000999 +; Original estimated trip count: (1+1000)/1 = 1001 +; Unroll count: 8 + +; Probability of >=7 iterations after first: p^7 = 0.993027916 +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 697208, i32 99302792} + +; Probability of >=8 more iterations: p^8 = 0.99203588 +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 796412, i32 99203588} + +; 1001//8 = 125 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125} + +; Probability of 1 to 7 more of 7 more remainder iterations: +; (p-p^8)/(1-p^8) = 0.874562282 +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 87456228, i32 12543772} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143 +; Frequency of third remainder iter: r3 = r2*(p-p^6)/(1-p^6) = 0.713571429 +; Frequency of fourth remainder iter: r4 = r2*(p-p^5)/(1-p^5) = 0.570571715 +; Frequency of fifth remainder iter: r5 = r2*(p-p^4)/(1-p^4) = 0.427714858 +; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715 +; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875. +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 74974988, i32 25025012} + +; Remainder estimated trip count: 1001%8 = 1 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} >From a95ebd1f900b5acf1b80fc13e2475204e7fb83bc Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Tue, 16 Sep 2025 16:56:41 -0400 Subject: [PATCH 2/3] Revert accidental change from right before pushing --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 4c05774f7f48c..0b8a345cc5c73 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1008,7 +1008,7 @@ bool llvm::setBranchProbability(BranchInst *B, double P, bool ForFirstTarget) { // Sum should be some large number so that the weights accurately encode P, // but it should not be so large that some branch weights will print as // negative in LLVM IR as that makes LLVM tests harder to maintain. - const uint64_t Sum = 1000000000; + const uint64_t Sum = 100000000; uint64_t Weight0 = round(P * Sum); uint64_t Weight1 = round((1 - P) * Sum); if (!ForFirstTarget) >From d85294e4392a8e17a81525450e227c9700f86732 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" <jdenny.o...@gmail.com> Date: Thu, 25 Sep 2025 14:30:02 -0400 Subject: [PATCH 3/3] Use BranchProbability --- llvm/include/llvm/Support/BranchProbability.h | 3 ++ .../include/llvm/Transforms/Utils/LoopUtils.h | 20 ++++---- .../llvm/Transforms/Utils/UnrollLoop.h | 2 +- llvm/lib/Support/BranchProbability.cpp | 7 +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp | 6 +-- .../Transforms/Utils/LoopUnrollRuntime.cpp | 50 +++++++++++-------- llvm/lib/Transforms/Utils/LoopUtils.cpp | 36 ++++++------- .../branch-weights-freq/unroll-epilog.ll | 38 +++++++------- .../LoopUnroll/runtime-loop-branchweight.ll | 19 ++++--- .../Transforms/LoopUnroll/runtime-loop.ll | 6 +-- .../LoopUnroll/unroll-heuristics-pgo.ll | 19 ++++--- 11 files changed, 113 insertions(+), 93 deletions(-) diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index 42fe225709ef8..b15d6e1707afa 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -97,6 +97,9 @@ class BranchProbability { /// \return \c Num divided by \c this. LLVM_ABI uint64_t scaleByInverse(uint64_t Num) const; + /// Compute pow(Probability, N). + BranchProbability pow(unsigned N) const; + BranchProbability &operator+=(BranchProbability RHS) { assert(N != UnknownN && RHS.N != UnknownN && "Unknown probability cannot participate in arithmetics."); diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index dff54ae83897c..8e3c18f21993a 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -366,28 +366,29 @@ LLVM_ABI bool setLoopEstimatedTripCount( std::optional<unsigned> EstimatedLoopInvocationWeight = std::nullopt); /// Based on branch weight metadata, return either: -/// - \c std::nullopt if the implementation is unable to handle the loop form -/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - An unknown probability if the implementation is unable to handle the loop +/// form of \p L (e.g., \p L must have a latch block that controls the loop +/// exit). /// - Else, the estimated probability that, at the end of any iteration, the /// latch of \p L will start another iteration. The result \c P is such that /// `0 <= P <= 1`, and `1 - P` is the probability of exiting the loop. -std::optional<double> getLoopProbability(Loop *L); +BranchProbability getLoopProbability(Loop *L); /// Set branch weight metadata for the latch of \p L to indicate that, at the /// end of any iteration, its estimated probability of starting another /// iteration is \p P. Return false if the implementation is unable to handle /// the loop form of \p L (e.g., \p L must have a latch block that controls the /// loop exit). Otherwise, return true. -bool setLoopProbability(Loop *L, double P); +bool setLoopProbability(Loop *L, BranchProbability P); /// Based on branch weight metadata, return either: -/// - \c std::nullopt if the implementation cannot extract the probability -/// (e.g., \p B must have exactly two target labels, so it must be a -/// conditional branch). +/// - An unknown probability if the implementation cannot extract the +/// probability (e.g., \p B must have exactly two target labels, so it must be +/// a conditional branch). /// - The probability \c P that control flows from \p B to its first target /// label such that `1 - P` is the probability of control flowing to its /// second target label, or vice-versa if \p ForFirstTarget is false. -std::optional<double> getBranchProbability(BranchInst *B, bool ForFirstTarget); +BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget); /// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are /// the probabilities of control flowing to its first and second target labels, @@ -395,7 +396,8 @@ std::optional<double> getBranchProbability(BranchInst *B, bool ForFirstTarget); /// the implementation cannot set the probability (e.g., \p B must have exactly /// two target labels, so it must be a conditional branch). Otherwise, return /// true. -bool setBranchProbability(BranchInst *B, double P, bool ForFirstTarget); +bool setBranchProbability(BranchInst *B, BranchProbability P, + bool ForFirstTarget); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 571a0af6fd0db..a3efc43c62dc3 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -99,7 +99,7 @@ LLVM_ABI bool UnrollRuntimeLoopRemainder( unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, Loop **ResultLoop = nullptr, std::optional<unsigned> OriginalTripCount = std::nullopt, - std::optional<double> OriginalLoopProb = std::nullopt); + BranchProbability OriginalLoopProb = BranchProbability::getUnknown()); LLVM_ABI LoopUnrollResult UnrollAndJamLoop( Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp index e3763449d16cb..ea42f34b58645 100644 --- a/llvm/lib/Support/BranchProbability.cpp +++ b/llvm/lib/Support/BranchProbability.cpp @@ -111,3 +111,10 @@ uint64_t BranchProbability::scale(uint64_t Num) const { uint64_t BranchProbability::scaleByInverse(uint64_t Num) const { return ::scale<0>(Num, D, N); } + +BranchProbability BranchProbability::pow(unsigned N) const { + BranchProbability Res = BranchProbability::getOne(); + for (unsigned I = 0; I < N; ++I) + Res *= *this; + return Res; +} diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b40d81100a2bd..2ffbe30208df9 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -502,7 +502,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); std::optional<unsigned> OriginalTripCount = llvm::getLoopEstimatedTripCount(L); - std::optional<double> OriginalLoopProb = llvm::getLoopProbability(L); + BranchProbability OriginalLoopProb = llvm::getLoopProbability(L); // Effectively "DCE" unrolled iterations that are beyond the max tripcount // and will never be executed. @@ -1161,10 +1161,10 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // the unrolled loop as a whole without considering the branch weights for // each unrolled iteration's latch within it, we store the new trip count as // separate metadata. - if (OriginalLoopProb && ULO.Runtime && EpilogProfitability) { + if (!OriginalLoopProb.isUnknown() && ULO.Runtime && EpilogProfitability) { // Where p is always the probability of executing at least 1 more // iteration, the probability for at least n more iterations is p^n. - setLoopProbability(L, pow(*OriginalLoopProb, ULO.Count)); + setLoopProbability(L, OriginalLoopProb.pow(ULO.Count)); } if (OriginalTripCount) { unsigned NewTripCount = *OriginalTripCount / ULO.Count; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 9ba93fcade2bd..a788a58624ec8 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -200,13 +200,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, /// from 0 to \p N more iterations can possibly execute. Among such cases in /// the original loop (with loop probability \p OriginalLoopProb), what is the /// probability of executing at least one more iteration? -static double probOfNextInRemainder(double OriginalLoopProb, unsigned N) { +static BranchProbability +probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) { // Each of these variables holds the original loop's probability that the // number of iterations it will execute is some m in the specified range. - double ProbOne = OriginalLoopProb; // 1 <= m - double ProbTooMany = pow(ProbOne, N + 1); // N + 1 <= m - double ProbNotTooMany = 1 - ProbTooMany; // 0 <= m <= N - double ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N + BranchProbability ProbOne = OriginalLoopProb; // 1 <= m + BranchProbability ProbTooMany = ProbOne.pow(N + 1); // N + 1 <= m + BranchProbability ProbNotTooMany = ProbTooMany.getCompl(); // 0 <= m <= N + BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N return ProbOneNotTooMany / ProbNotTooMany; } @@ -237,7 +238,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count, AssumptionCache &AC, - std::optional<double> OriginalLoopProb) { + BranchProbability OriginalLoopProb) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); @@ -348,16 +349,17 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, PreserveLCSSA); // Add the branch to the exit block (around the epilog loop) MDNode *BranchWeights = nullptr; - if (!OriginalLoopProb && hasBranchWeightMD(*Latch->getTerminator())) { + if (OriginalLoopProb.isUnknown() && + hasBranchWeightMD(*Latch->getTerminator())) { // Assume equal distribution in interval [0, Count). MDBuilder MDB(B.getContext()); BranchWeights = MDB.createBranchWeights(1, Count - 1); } BranchInst *RemainderLoopGuard = B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); - if (OriginalLoopProb) { + if (!OriginalLoopProb.isUnknown()) { setBranchProbability(RemainderLoopGuard, - probOfNextInRemainder(*OriginalLoopProb, Count - 1), + probOfNextInRemainder(OriginalLoopProb, Count - 1), /*ForFirstTarget=*/true); } InsertPt->eraseFromParent(); @@ -387,7 +389,7 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count, std::optional<unsigned> OriginalTripCount, - std::optional<double> OriginalLoopProb) { + BranchProbability OriginalLoopProb) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -442,7 +444,7 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); MDNode *BranchWeights = nullptr; - if (!(OriginalLoopProb && UseEpilogRemainder) && + if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) && hasBranchWeightMD(*LatchBR)) { uint32_t ExitWeight; uint32_t BackEdgeWeight; @@ -463,21 +465,25 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, } BranchInst *RemainderLoopLatch = Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); - if (OriginalLoopProb && UseEpilogRemainder) { + if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) { // Compute the total frequency of the original loop body from the // remainder iterations. Once we've reached them, the first of them - // always executes, so it's frequency and probability are 1. + // always executes, so its frequency and probability are 1. double FreqRemIters = 1; if (Count > 2) { - double ProbReaching = 1; + BranchProbability ProbReaching = BranchProbability::getOne(); for (unsigned N = Count - 2; N >= 1; --N) { - ProbReaching *= probOfNextInRemainder(*OriginalLoopProb, N); - FreqRemIters += ProbReaching; + ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N); + FreqRemIters += double(ProbReaching.getNumerator()) / + ProbReaching.getDenominator(); } } // Solve for the loop probability that would produce that frequency. // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters. - double Prob = 1 - 1 / FreqRemIters; + double ProbDouble = 1 - 1 / FreqRemIters; + BranchProbability Prob = BranchProbability::getBranchProbability( + std::round(ProbDouble * BranchProbability::getDenominator()), + BranchProbability::getDenominator()); setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true); } NewIdx->addIncoming(Zero, InsertTop); @@ -648,7 +654,7 @@ bool llvm::UnrollRuntimeLoopRemainder( const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, Loop **ResultLoop, std::optional<unsigned> OriginalTripCount, - std::optional<double> OriginalLoopProb) { + BranchProbability OriginalLoopProb) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -868,7 +874,7 @@ bool llvm::UnrollRuntimeLoopRemainder( BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. MDNode *BranchWeights = nullptr; - if (!(OriginalLoopProb && UseEpilogRemainder) && + if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) && hasBranchWeightMD(*Latch->getTerminator())) { // Assume loop is nearly always entered. MDBuilder MDB(B.getContext()); @@ -876,12 +882,12 @@ bool llvm::UnrollRuntimeLoopRemainder( } BranchInst *UnrollingLoopGuard = B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); - if (OriginalLoopProb && UseEpilogRemainder) { + if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) { // The original loop's first iteration always happens. Compute the // probability of the original loop executing Count-1 iterations after that // to complete the first iteration of the unrolled loop. - double ProbOne = *OriginalLoopProb; - double ProbRest = pow(ProbOne, Count - 1); + BranchProbability ProbOne = OriginalLoopProb; + BranchProbability ProbRest = ProbOne.pow(Count - 1); setBranchProbability(UnrollingLoopGuard, ProbRest, /*ForFirstTarget=*/false); } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index a798c32230fb2..0b15f93f73e38 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -973,15 +973,15 @@ bool llvm::setLoopEstimatedTripCount( return true; } -std::optional<double> llvm::getLoopProbability(Loop *L) { +BranchProbability llvm::getLoopProbability(Loop *L) { BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) - return std::nullopt; + return BranchProbability::getUnknown(); bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader(); return getBranchProbability(LatchBranch, FirstTargetIsLoop); } -bool llvm::setLoopProbability(Loop *L, double P) { +bool llvm::setLoopProbability(Loop *L, BranchProbability P) { BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return false; @@ -989,34 +989,30 @@ bool llvm::setLoopProbability(Loop *L, double P) { return setBranchProbability(LatchBranch, P, FirstTargetIsLoop); } -std::optional<double> llvm::getBranchProbability(BranchInst *B, - bool ForFirstTarget) { +BranchProbability llvm::getBranchProbability(BranchInst *B, + bool ForFirstTarget) { if (B->getNumSuccessors() != 2) - return std::nullopt; + return BranchProbability::getUnknown(); uint64_t Weight0, Weight1; if (!extractBranchWeights(*B, Weight0, Weight1)) - return std::nullopt; + return BranchProbability::getUnknown(); if (!ForFirstTarget) std::swap(Weight0, Weight1); - return double(Weight0) / (double(Weight0) + double(Weight1)); + return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1); } -bool llvm::setBranchProbability(BranchInst *B, double P, bool ForFirstTarget) { +bool llvm::setBranchProbability(BranchInst *B, BranchProbability P, + bool ForFirstTarget) { if (B->getNumSuccessors() != 2) return false; - - // Sum should be some large number so that the weights accurately encode P, - // but it should not be so large that some branch weights will print as - // negative in LLVM IR as that makes LLVM tests harder to maintain. - const uint64_t Sum = 100000000; - uint64_t Weight0 = round(P * Sum); - uint64_t Weight1 = round((1 - P) * Sum); + BranchProbability Prob0 = P; + BranchProbability Prob1 = P.getCompl(); if (!ForFirstTarget) - std::swap(Weight0, Weight1); - + std::swap(Prob0, Prob1); MDBuilder MDB(B->getContext()); - B->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(Weight0, Weight1)); + B->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(Prob0.getNumerator(), Prob1.getNumerator())); return true; } diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll index c7c91c6f6b815..96b31d801c2f9 100644 --- a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -113,17 +113,17 @@ do.end: ; ------------------------------------------------------------------------------ ; Check branch weight metadata and estimated trip count metadata. ; -; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 9090909, i32 90909091} -; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 24868520, i32 75131480} -; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 57590238, i32 42409762} -; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 61445671, i32 38554329} -; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 64950610, i32 35049390} +; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862} +; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250} +; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1236740947, i32 910742701} +; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1394803730, i32 752679918} ; -; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17355372, i32 82644628} -; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 31698654, i32 68301346} -; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 61445671, i32 38554329} -; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 64950610, i32 35049390} -; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 68136918, i32 31863082} +; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 372703773, i32 1774779875} +; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 680723421, i32 1466760227} +; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1394803730, i32 752679918} +; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1463229177, i32 684254471} ; ; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} ; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} @@ -138,16 +138,16 @@ do.end: ; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} ; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} ; -; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 47619048, i32 52380952} -; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 71320836, i32 28679164} -; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 85204964, i32 14795036} -; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 86003351, i32 13996649} -; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 86657880, i32 13342120} +; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388} +; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1531603292, i32 615880356} +; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1829762672, i32 317720976} +; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1846907894, i32 300575754} +; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1860963812, i32 286519836} ; -; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 48361934, i32 51638066} -; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 77129012, i32 22870988} -; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 78838041, i32 21161959} -; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 80252977, i32 19747023} +; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1038564635, i32 1108919013} +; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1656332913, i32 491150735} +; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1693034047, i32 454449601} +; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1723419551, i32 424064097} ; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 489f7924f9054..2f8f98d40e86f 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -48,11 +48,13 @@ for.end: ; Original estimated trip count: (1+9999)/1 = 10000 ; Unroll count: 4 -; Probability of >=3 iterations after first: p^3 = 0.9970003 -; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 29997, i32 99970003} +; Probability of >=3 iterations after first: p^3 = 0.9970003 =~ +; 2146839468 / (644180 + 2146839468). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 644180, i32 2146839468} -; Probability of >=4 more iterations: p^4 = 0.99960006 -; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 39994, i32 99960006} +; Probability of >=4 more iterations: p^4 = 0.99960006 =~ +; 2146624784 / (858864 + 2146624784). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 858864, i32 2146624784} ; 10000//4 = 2500 ; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} @@ -61,15 +63,16 @@ for.end: ; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} ; Probability of 1 to 3 more of 3 more remainder iterations: -; (p-p^4)/(1-p^4) = 0.749962497 -; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 74996250, i32 25003750} +; (p-p^4)/(1-p^4) = 0.749962497 =~ 1610532724 / (1610532724 + 536950924). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1610532724, i32 536950924} ; Frequency of first remainder iter: r1 = 1 ; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331 ; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999 ; Solve for loop probability that produces that frequency: f = 1/(1-p') => -; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332. -; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 49998333, i32 50001667} +; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332 =~ +; 1073706403 / (1073706403 + 1073777245). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245} ; 10000%4 = 0 ; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index 0bc2e6aa7810a..ec7aba432b484 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -295,9 +295,9 @@ exit2.loopexit: ; COMMON-LABEL: {{^}}!0 = ; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} -; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 15186332, i32 84813668} -; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 86446668, i32 13553332} -; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 74397846, i32 25602154} +; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 326124004, i32 1821359644} +; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1856428066, i32 291055582} +; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 1597681585, i32 549802063} ; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]} ; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index c9e51ea4cebbe..02f5bf932132e 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -77,19 +77,21 @@ loop.end: ; Original estimated trip count: (1+1000)/1 = 1001 ; Unroll count: 8 -; Probability of >=7 iterations after first: p^7 = 0.993027916 -; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 697208, i32 99302792} +; Probability of >=7 iterations after first: p^7 = 0.993027916 =~ +; 2132511214 / (14972434 + 2132511214). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 14972434, i32 2132511214} -; Probability of >=8 more iterations: p^8 = 0.99203588 -; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 796412, i32 99203588} +; Probability of >=8 more iterations: p^8 = 0.99203588 =~ +; 2130380833 / (17102815 + 2130380833). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17102815, i32 2130380833} ; 1001//8 = 125 ; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]} ; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125} ; Probability of 1 to 7 more of 7 more remainder iterations: -; (p-p^8)/(1-p^8) = 0.874562282 -; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 87456228, i32 12543772} +; (p-p^8)/(1-p^8) = 0.874562282 =~ 1878108210 / (1878108210 + 269375438). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1878108210, i32 269375438} ; Frequency of first remainder iter: r1 = 1 ; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143 @@ -99,8 +101,9 @@ loop.end: ; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715 ; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143 ; Solve for loop probability that produces that frequency: f = 1/(1-p') => -; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875. -; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 74974988, i32 25025012} +; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875 =~ +; 1610075606 / (1610075606 + 537408042). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1610075606, i32 537408042} ; Remainder estimated trip count: 1001%8 = 1 ; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits