https://gcc.gnu.org/g:1da3c4d90e678af0fed89c5638c97a41e5e04547
commit r16-3581-g1da3c4d90e678af0fed89c5638c97a41e5e04547 Author: Jan Hubicka <hubi...@ucw.cz> Date: Thu Sep 4 17:23:20 2025 +0200 Fix scalng of auto-fdo profiles in liner with auto-fdo it is possible that function bar with non-zero profile is inlined into foo with zero profile and foo is the only caller of it. In this case we currently scale bar to also have zero profile which makes it optimized for size. With normal profiles this does not happen, since basic blocks with non-zero count must have some way to be reached. This patch makes inliner to scale caller in this case which mitigates the problem (to some degree). Bootstrapped/regtested x86_64-linux, plan to commit it shortly. gcc/ChangeLog: * ipa-inline-transform.cc (inline_call): If function with AFDO profile is inlined into function with GUESSED_GLOBAL0_AFDO or GUESSED_GLOBAL0_ADJUSTED, scale caller to AFDO profile. * profile-count.h (profile_count::apply_scale): If num is AFDO and den is not GUESSED, make result AFDO rather then GUESSED. Diff: --- gcc/ipa-inline-transform.cc | 34 ++++++++++++++++++++++++++++++++++ gcc/profile-count.h | 14 ++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index 9d759d218b56..5c244bc17ac4 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -344,6 +344,40 @@ inline_call (struct cgraph_edge *e, bool update_original, to = e->caller; if (to->inlined_to) to = to->inlined_to; + + /* In case callee has AFDO profile but caller has GLOBAL0 we need + to re-scale it so it can have non-zero AFDO profile. */ + if (callee->count.quality () == AFDO + && e->count.nonzero_p () + && (to->count.quality () == GUESSED_GLOBAL0_AFDO + || to->count.quality () == GUESSED_GLOBAL0_ADJUSTED)) + { + profile_count num = callee->count; + profile_count den = e->count; + profile_count::adjust_for_ipa_scaling (&num, &den); + if (dump_file) + { + fprintf (dump_file, "Rescalling profile of caller %s " + "to allow non-zero AFDO counts:", + to->dump_name ()); + den.dump (dump_file); + fprintf (dump_file, " -> "); + num.dump (dump_file); + fprintf (dump_file, "\n"); + } + to->apply_scale (num, den); + to->frequency = std::max (to->frequency, callee->frequency); + /* Do not update original, so possible additional calls of callee + are handled reasonably well. */ + update_original = false; + gcc_checking_assert (to->count.quality () == AFDO); + if (dump_file) + { + fprintf (dump_file, "Scaled profile of %s: ", to->dump_name ()); + to->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } if (to->thunk) { struct cgraph_node *target = to->callees->callee; diff --git a/gcc/profile-count.h b/gcc/profile-count.h index c893aec577c1..65c4596a2b0f 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -1212,8 +1212,18 @@ public: /* Be sure that ret is not local if num is global. Also ensure that ret is not global0 when num is global. */ if (num.ipa_p ()) - ret.m_quality = MAX (ret.m_quality, - num == num.ipa () ? GUESSED : num.m_quality); + { + /* This is common case of AFDO scaling when we upgrade + GLOBAL0_AFDO function to AFDO. Be sure that result + is AFDO and not GUESSED (which is unnecesarily low). */ + if (num.m_quality == AFDO + && (ret.m_quality != GUESSED + && ret.m_quality != GUESSED_LOCAL)) + ret.m_quality = AFDO; + else + ret.m_quality = MAX (ret.m_quality, + num == num.ipa () ? GUESSED : num.m_quality); + } return ret; }