https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/83809
>From 9575b83ea40012ecbfbf301a24ec89de0726ffd4 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Mon, 4 Mar 2024 00:43:55 -0800 Subject: [PATCH] update profile for invoke instruction in caller and callee after inline --- llvm/include/llvm/IR/Instructions.h | 3 + llvm/lib/IR/Instructions.cpp | 12 ++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 11 +- .../Inline/update_invoke_value_profile.ll | 185 ++++++++++++++++++ 4 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Inline/update_invoke_value_profile.ll diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index bc357074e5cb21..1146b3fa3ae244 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4360,6 +4360,9 @@ class InvokeInst : public CallBase { unsigned getNumSuccessors() const { return 2; } + /// Updates profile metadata by scaling it by \p S / \p T. + void updateProfWeight(uint64_t S, uint64_t T); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Invoke); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 9ae71acd523c36..920ce67f118991 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -918,6 +918,18 @@ LandingPadInst *InvokeInst::getLandingPadInst() const { return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI()); } +void InvokeInst::updateProfWeight(uint64_t S, uint64_t T) { + if (T == 0) { + LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in " + "div by 0. Ignoring. Likely the function " + << getParent()->getParent()->getName() + << " has 0 entry count, and contains call instructions " + "with non-zero prof info."); + return; + } + scaleProfData(*this, S, T); +} + //===----------------------------------------------------------------------===// // CallBrInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f68fdb26f28173..75b0d0669e9228 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1909,10 +1909,14 @@ void llvm::updateProfileCallee( // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; - for (auto Entry : *VMap) + for (auto Entry : *VMap) { if (isa<CallInst>(Entry.first)) if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + if (isa<InvokeInst>(Entry.first)) + if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) + II->updateProfWeight(CloneEntryCount, PriorEntryCount); + } } if (EntryDelta) { @@ -1921,9 +1925,12 @@ void llvm::updateProfileCallee( for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) - for (Instruction &I : BB) + for (Instruction &I : BB) { if (CallInst *CI = dyn_cast<CallInst>(&I)) CI->updateProfWeight(NewEntryCount, PriorEntryCount); + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) + II->updateProfWeight(NewEntryCount, PriorEntryCount); + } } } diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll new file mode 100644 index 00000000000000..ac5597a41fce61 --- /dev/null +++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 + +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=1000 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.Error = type { i32 } +@_ZTI5Error = external constant { ptr, ptr } + +define i32 @callee(ptr %b) personality ptr @__gxx_personality_v0 !prof !17 { +; CHECK-LABEL: define i32 @callee( +; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8 +; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = invoke i32 [[TMP0]](ptr [[B]]) +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: lpad: +; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch ptr @_ZTI5Error +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error) +; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]] +; CHECK: catch: +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP6]], ptr [[E]], align 4 +; CHECK-NEXT: [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]]) +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]] +; CHECK: invoke.cont2: +; CHECK-NEXT: br label [[TRY_CONT]] +; CHECK: try.cont: +; CHECK-NEXT: [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RET_0]] +; CHECK: lpad1: +; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +; CHECK: invoke.cont4: +; CHECK-NEXT: br label [[EHCLEANUP]] +; CHECK: ehcleanup: +; CHECK-NEXT: [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4]] ], [ [[TMP1]], [[LPAD]] ] +; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED]] +; CHECK: terminate.lpad: +; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: unreachable +; +entry: + %e = alloca %class.Error + %vtable = load ptr, ptr %b + %0 = load ptr, ptr %vtable + %call = invoke i32 %0(ptr %b) + to label %try.cont unwind label %lpad, !prof !15 + +lpad: + %1 = landingpad { ptr, i32 } + cleanup + catch ptr @_ZTI5Error + %2 = extractvalue { ptr, i32 } %1, 1 + %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error) + %matches = icmp eq i32 %2, %3 + br i1 %matches, label %catch, label %ehcleanup + +catch: + %4 = extractvalue { ptr, i32 } %1, 0 + %5 = tail call ptr @__cxa_begin_catch(ptr %4) + %6 = load i32, ptr %5 + store i32 %6, ptr %e + %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr %e) + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + br label %try.cont + +try.cont: + %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ] + ret i32 %ret.0 + +lpad1: + %7 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %invoke.cont4 unwind label %terminate.lpad + +invoke.cont4: + br label %ehcleanup + +ehcleanup: + %lpad.val7.merged = phi { ptr, i32 } [ %7, %invoke.cont4 ], [ %1, %lpad ] + resume { ptr, i32 } %lpad.val7.merged + +terminate.lpad: + %8 = landingpad { ptr, i32 } + catch ptr null + unreachable +} + +define i32 @caller(ptr %b) !prof !16 { +; CHECK-LABEL: define i32 @caller( +; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF2:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[E_I:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[E_I]]) +; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE_I]], align 8 +; CHECK-NEXT: [[CALL_I:%.*]] = invoke i32 [[TMP0]](ptr [[B]]) +; CHECK-NEXT: to label [[CALLEE_EXIT:%.*]] unwind label [[LPAD_I:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK: lpad.i: +; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch ptr @_ZTI5Error +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error) +; CHECK-NEXT: [[MATCHES_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[MATCHES_I]], label [[CATCH_I:%.*]], label [[EHCLEANUP_I:%.*]] +; CHECK: catch.i: +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP6]], ptr [[E_I]], align 4 +; CHECK-NEXT: [[CALL3_I:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E_I]]) +; CHECK-NEXT: to label [[INVOKE_CONT2_I:%.*]] unwind label [[LPAD1_I:%.*]] +; CHECK: invoke.cont2.i: +; CHECK-NEXT: br label [[CALLEE_EXIT]] +; CHECK: lpad1.i: +; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label [[INVOKE_CONT4_I:%.*]] unwind label [[TERMINATE_LPAD_I:%.*]] +; CHECK: invoke.cont4.i: +; CHECK-NEXT: br label [[EHCLEANUP_I]] +; CHECK: ehcleanup.i: +; CHECK-NEXT: [[LPAD_VAL7_MERGED_I:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4_I]] ], [ [[TMP1]], [[LPAD_I]] ] +; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED_I]] +; CHECK: terminate.lpad.i: +; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: unreachable +; CHECK: callee.exit: +; CHECK-NEXT: [[RET_0_I:%.*]] = phi i32 [ [[CALL3_I]], [[INVOKE_CONT2_I]] ], [ [[CALL_I]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[E_I]]) +; CHECK-NEXT: ret i32 [[RET_0_I]] +; +entry: + %call = tail call i32 @callee(ptr %b) + ret i32 %call +} + +declare i32 @__gxx_personality_v0(...) +declare i32 @llvm.eh.typeid.for(ptr) +declare ptr @__cxa_begin_catch(ptr) +declare i32 @_ZN5Error10error_codeEv(ptr) +declare void @__cxa_end_catch() + + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"VP", i32 0, i64 1500, i64 9261744921105590125, i64 1500} +!16 = !{!"function_entry_count", i64 1000} +!17 = !{!"function_entry_count", i64 1500} +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 500} +; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 500, i64 -9184999152603961491, i64 500} +; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 1000, i64 -9184999152603961491, i64 1000} +;. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits