https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/107329
>From 22e94e4f30c0b3f4c895e789961bff03db745980 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Tue, 3 Sep 2024 21:28:05 -0700 Subject: [PATCH] [ctx_prof] Flattened profile lowering pass --- llvm/include/llvm/ProfileData/ProfileCommon.h | 6 +- .../Instrumentation/PGOCtxProfFlattening.h | 25 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/PGOCtxProfFlattening.cpp | 350 ++++++++++++++++++ .../flatten-always-removes-instrumentation.ll | 12 + .../CtxProfAnalysis/flatten-and-annotate.ll | 112 ++++++ 9 files changed, 506 insertions(+), 3 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h index eaab59484c947a..edd8e1f644ad12 100644 --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -79,13 +79,13 @@ class ProfileSummaryBuilder { class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { uint64_t MaxInternalBlockCount = 0; - inline void addEntryCount(uint64_t Count); - inline void addInternalCount(uint64_t Count); - public: InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs) : ProfileSummaryBuilder(std::move(Cutoffs)) {} + void addEntryCount(uint64_t Count); + void addInternalCount(uint64_t Count); + void addRecord(const InstrProfRecord &); std::unique_ptr<ProfileSummary> getSummary(); }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h new file mode 100644 index 00000000000000..0eab3aaf6fcad3 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h @@ -0,0 +1,25 @@ +//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the PGOCtxProfFlattening class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H + +#include "llvm/IR/PassManager.h" +namespace llvm { + +class PGOCtxProfFlatteningPass + : public PassInfoMixin<PGOCtxProfFlatteningPass> { +public: + explicit PGOCtxProfFlatteningPass() = default; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a22abed8051a11..d87e64eff08966 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -198,6 +198,7 @@ #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 1fd7ef929c87d5..38297dc02b8be6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -76,6 +76,7 @@ #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d6067089c6b5c1..2b0624cb9874da 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("ctx-instr-gen", PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)) +MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("debugify", NewPMDebugifyPass()) MODULE_PASS("dfsan", DataFlowSanitizerPass()) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index deab37801ff1df..d45b07447d09da 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation InstrProfiling.cpp KCFI.cpp LowerAllowCheckPass.cpp + PGOCtxProfFlattening.cpp PGOCtxProfLowering.cpp PGOForceFunctionAttrs.cpp PGOInstrumentation.cpp diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp new file mode 100644 index 00000000000000..d4224135f771b7 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -0,0 +1,350 @@ +//===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Flattens the contextual profile and lowers it to MD_prof. +// This should happen after all IPO (which is assumed to have maintained the +// contextual profile) happened. Flattening consists of summing the values at +// the same index of the counters belonging to all the contexts of a function. +// The lowering consists of materializing the counter values to function +// entrypoint counts and branch probabilities. +// +// This pass also removes contextual instrumentation, which has been kept around +// to facilitate its functionality. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Analysis/CtxProfAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { + +class ProfileAnnotator final { + class BBInfo; + struct EdgeInfo { + BBInfo *const Src; + BBInfo *const Dest; + std::optional<uint64_t> Count; + + explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {} + }; + + class BBInfo { + std::optional<uint64_t> Count; + SmallVector<EdgeInfo *> OutEdges; + SmallVector<EdgeInfo *> InEdges; + size_t UnknownCountOutEdges = 0; + size_t UnknownCountInEdges = 0; + + // Pass AssumeAllKnown when we try to propagate counts from edges to BBs - + // because all the edge counters must be known. + uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges, + bool AssumeAllKnown) const { + uint64_t Sum = 0; + for (const auto *E : Edges) + if (E) + Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U); + return Sum; + } + + void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) { + assert(!Count.has_value()); + Count = getEdgeSum(Edges, true); + } + + void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) { + uint64_t KnownSum = getEdgeSum(Edges, false); + uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; + EdgeInfo *E = nullptr; + for (auto *I : Edges) + if (I && !I->Count.has_value()) { + E = I; +#ifdef NDEBUG + break; +#else + assert((!E || E == I) && + "Expected exactly one edge to have an unknown count, " + "found a second one"); + continue; +#endif + } + assert(E && "Expected exactly one edge to have an unknown count"); + assert(!E->Count.has_value()); + E->Count = EdgeVal; + assert(E->Src->UnknownCountOutEdges > 0); + assert(E->Dest->UnknownCountInEdges > 0); + --E->Src->UnknownCountOutEdges; + --E->Dest->UnknownCountInEdges; + } + + public: + BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count) + : Count(Count) { + // For in edges, we just want to pre-allocate enough space, since we know + // it at this stage. For out edges, we will insert edges at the indices + // corresponding to positions in this BB's terminator instruction, so we + // construct a default (nullptr values)-initialized vector. A nullptr edge + // corresponds to those that are excluded (see shouldExcludeEdge). + InEdges.reserve(NumInEdges); + OutEdges.resize(NumOutEdges); + } + + bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { + if (!succ_empty(&BB) && !UnknownCountOutEdges) { + computeCountFrom(OutEdges); + return true; + } + return false; + } + + bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { + if (!BB.isEntryBlock() && !UnknownCountInEdges) { + computeCountFrom(InEdges); + return true; + } + return false; + } + + void addInEdge(EdgeInfo &Info) { + InEdges.push_back(&Info); + ++UnknownCountInEdges; + } + + // For the out edges, we care about the position we place them in, which is + // the position in terminator instruction's list (at construction). Later, + // we build branch_weights metadata with edge frequency values matching + // these positions. + void addOutEdge(size_t Index, EdgeInfo &Info) { + OutEdges[Index] = &Info; + ++UnknownCountOutEdges; + } + + bool hasCount() const { return Count.has_value(); } + + bool trySetSingleUnknownInEdgeCount() { + if (UnknownCountInEdges == 1) { + setSingleUnknownEdgeCount(InEdges); + return true; + } + return false; + } + + bool trySetSingleUnknownOutEdgeCount() { + if (UnknownCountOutEdges == 1) { + setSingleUnknownEdgeCount(OutEdges); + return true; + } + return false; + } + size_t getNumOutEdges() const { return OutEdges.size(); } + + uint64_t getEdgeCount(size_t Index) const { + if (auto *E = OutEdges[Index]) + return *E->Count; + return 0U; + } + }; + + Function &F; + const SmallVectorImpl<uint64_t> &Counters; + // To be accessed through getBBInfo() after construction. + std::map<const BasicBlock *, BBInfo> BBInfos; + std::vector<EdgeInfo> EdgeInfos; + InstrProfSummaryBuilder &PB; + + // This is an adaptation of PGOUseFunc::populateCounters. + // FIXME(mtrofin): look into factoring the code to share one implementation. + void propagateCounterValues(const SmallVectorImpl<uint64_t> &Counters) { + bool KeepGoing = true; + while (KeepGoing) { + KeepGoing = false; + for (const auto &BB : reverse(F)) { + auto &Info = getBBInfo(BB); + if (!Info.hasCount()) + KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || + Info.tryTakeCountFromKnownInEdges(BB); + if (Info.hasCount()) { + KeepGoing |= Info.trySetSingleUnknownOutEdgeCount(); + KeepGoing |= Info.trySetSingleUnknownInEdgeCount(); + } + } + } + } + // The only criteria for exclusion is faux suspend -> exit edges in presplit + // coroutines. The API serves for readability, currently. + bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const { + return llvm::isPresplitCoroSuspendExitEdge(Src, Dest); + } + + BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } + +public: + ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters, + InstrProfSummaryBuilder &PB) + : F(F), Counters(Counters), PB(PB) { + assert(!F.isDeclaration()); + assert(!Counters.empty()); + size_t NrEdges = 0; + for (const auto &BB : F) { + std::optional<uint64_t> Count; + if (auto *Ins = CtxProfAnalysis::getBBInstrumentation( + const_cast<BasicBlock &>(BB))) { + auto Index = Ins->getIndex()->getZExtValue(); + assert(Index < Counters.size() && + "The index must be inside the counters vector by construction - " + "tripping this assertion indicates a bug in how the contextual " + "profile is managed by IPO transforms"); + Count = Counters[Ins->getIndex()->getZExtValue()]; + } + auto [It, Ins] = + BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); + (void)Ins; + assert(Ins && "We iterate through the function's BBs, no reason to " + "insert one more than once"); + NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) { + return !shouldExcludeEdge(BB, *Succ); + }); + } + // Pre-allocate the vector, we want references to its contents to be stable. + EdgeInfos.reserve(NrEdges); + for (const auto &BB : F) { + auto &Info = getBBInfo(BB); + for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) { + const auto *Succ = BB.getTerminator()->getSuccessor(I); + if (!shouldExcludeEdge(BB, *Succ)) { + auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ)); + Info.addOutEdge(I, EI); + getBBInfo(*Succ).addInEdge(EI); + } + } + } + assert(EdgeInfos.capacity() == NrEdges && + "The capacity of EdgeInfos should have stayed unchanged it was " + "populated, because we need pointers to its contents to be stable"); + } + + /// Assign branch weights and function entry count. Also update the PSI + /// builder. + void assignProfileData() { + assert(!Counters.empty()); + propagateCounterValues(Counters); + F.setEntryCount(Counters[0]); + PB.addEntryCount(Counters[0]); + + for (auto &BB : F) { + if (succ_size(&BB) < 2) + continue; + auto *Term = BB.getTerminator(); + SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0); + uint64_t MaxCount = 0; + const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; + ++SuccIdx) { + uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); + if (EdgeCount > MaxCount) + MaxCount = EdgeCount; + EdgeCounts[SuccIdx] = EdgeCount; + PB.addInternalCount(EdgeCount); + } + + if (MaxCount == 0) + F.getContext().emitError( + "[ctx-prof] Encountered a BB with more than one successor, where " + "all outgoing edges have a 0 count. This occurs in non-exiting " + "functions (message pumps, usually) which are not supported in the " + "contextual profiling case"); + setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); + } + } +}; + +bool areAllBBsReachable(const Function &F, FunctionAnalysisManager &FAM) { + auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F)); + return llvm::all_of( + F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); }); +} + +void clearColdFunctionProfile(Function &F) { + for (auto &BB : F) + BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr); + F.setEntryCount(0U); +} + +void removeInstrumentation(Function &F) { + for (auto &BB : F) + for (auto &I : llvm::make_early_inc_range(BB)) + if (isa<InstrProfCntrInstBase>(I)) + I.eraseFromParent(); +} + +} // namespace + +PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M, + ModuleAnalysisManager &MAM) { + // Ensure in all cases the instrumentation is removed: if this module had no + // roots, the contextual profile would evaluate to false, but there would + // still be instrumentation. + // Note: in such cases we leave as-is any other profile info (if present - + // e.g. synthetic weights, etc) because it wouldn't interfere with the + // contextual - based one (which would be in other modules) + auto OnExit = llvm::make_scope_exit([&]() { + for (auto &F : M) + removeInstrumentation(F); + }); + auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M); + if (!CtxProf) + return PreservedAnalyses::all(); + + const auto FlattenedProfile = CtxProf.flatten(); + + InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs); + for (auto &F : M) { + if (F.isDeclaration()) + continue; + + assert(areAllBBsReachable( + F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M) + .getManager()) && + "Function has unreacheable basic blocks. The expectation was that " + "DCE was run before."); + + const auto &FlatProfile = + FlattenedProfile.lookup(AssignGUIDPass::getGUID(F)); + // If this function didn't appear in the contextual profile, it's cold. + if (FlatProfile.empty()) + clearColdFunctionProfile(F); + else { + ProfileAnnotator S(F, FlatProfile, PB); + S.assignProfileData(); + } + } + + auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M); + + M.setProfileSummary(PB.getSummary()->getMD(M.getContext()), + ProfileSummary::Kind::PSK_Instr); + PSI.refresh(); + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll new file mode 100644 index 00000000000000..c1c9cfa5a4f471 --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll @@ -0,0 +1,12 @@ +; RUN: opt -passes=ctx-prof-flatten %s -S | FileCheck %s + +declare void @bar() + +define void @foo() { + call void @llvm.instrprof.increment(ptr @foo, i64 123, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @foo, i64 123, i32 1, i32 0, ptr @bar) + call void @bar() + ret void +} + +; CHECK-NOT: call void @llvm.instrprof \ No newline at end of file diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll new file mode 100644 index 00000000000000..b7950b26a3ef27 --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll @@ -0,0 +1,112 @@ +; REQUIRES: x86_64-linux +; +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata +; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \ +; RUN: %t/example.ll -S -o %t/prelink.ll +; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK +; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S | FileCheck %s +; +; +; Check that instrumentation occurs where expected: the "no" block for both foo and +; @an_entrypoint - which explains the subsequent branch weights +; +; PRELINK-LABEL: @foo +; PRELINK-LABEL: yes: +; PRELINK-LABEL: no: +; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1) + +; PRELINK-LABEL: @an_entrypoint +; PRELINK-LABEL: yes: +; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1) +; PRELINK-NOT: "ProfileSummary" + +; Check that the output has: +; - no instrumentation +; - the 2 functions have an entry count +; - each conditional branch has profile annotation +; +; CHECK-NOT: call void @llvm.instrprof +; +; make sure we have function entry counts, branch weights, and a profile summary. +; CHECK-LABEL: @foo +; CHECK-SAME: !prof ![[FOO_EP:[0-9]+]] +; CHECK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]] +; CHECK-LABEL: @an_entrypoint +; CHECK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]] +; CHECK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]] + + +; CHECK: ![[#]] = !{i32 1, !"ProfileSummary", !1} +; CHECK: ![[#]] = !{!"TotalCount", i64 480} +; CHECK: ![[#]] = !{!"MaxCount", i64 140} +; CHECK: ![[#]] = !{!"MaxInternalCount", i64 125} +; CHECK: ![[#]] = !{!"MaxFunctionCount", i64 140} +; CHECK: ![[#]] = !{!"NumCounts", i64 6} +; CHECK: ![[#]] = !{!"NumFunctions", i64 2} +; +; @foo will be called both unconditionally and conditionally, on the "yes" branch +; which has a count of 40. So 140 times. + +; CHECK: ![[FOO_EP]] = !{!"function_entry_count", i64 140} + +; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo). +; Which means its "yes" branch is taken 140 - 15 times. + +; CHECK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15} +; CHECK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100} +; CHECK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60} + +;--- profile.json +[ + { + "Guid": 4909520559318251808, + "Counters": [100, 40], + "Callsites": [ + [ + { + "Guid": 11872291593386833696, + "Counters": [ 100, 5 ] + } + ], + [ + { + "Guid": 11872291593386833696, + "Counters": [ 40, 10 ] + } + ] + ] + } +] +;--- example.ll +declare void @bar() + +define void @foo(i32 %a, ptr %fct) #0 !guid !0 { + %t = icmp sgt i32 %a, 7 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} + +define void @an_entrypoint(i32 %a) !guid !1 { + %t = icmp sgt i32 %a, 0 + call void @foo(i32 10, ptr null) + br i1 %t, label %yes, label %no + +yes: + call void @foo(i32 1, ptr null) + ret void +no: + ret void +} + +attributes #0 = { noinline } +!0 = !{ i64 11872291593386833696 } +!1 = !{i64 4909520559318251808} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits