Hi, while looking into Firefox inline dumps I noticed that my previous patch was not quite enough to get inline summaries right. The problem is that we may have speculative edges from profile feedback and also it does happen that profiles mismatch and we should handle it better. This patch fixes the problem in more robust way.
Bootstrapped/regtested x86_64-linux and also tested with firefox, will commit it shortly. Honza * ipa-utils.c (ipa_merge_profiles): Fix updating of fnsummary; also handle rescaling of mismatched profiles. * ipa-fnsummary.c (analyze_function): Handle speculative edges. Index: ipa-utils.c =================================================================== --- ipa-utils.c (revision 267377) +++ ipa-utils.c (working copy) @@ -392,6 +392,7 @@ if (!src->definition || !dst->definition) return; + if (src->frequency < dst->frequency) src->frequency = dst->frequency; @@ -416,6 +417,8 @@ fprintf (symtab->dump_file, "Merging profiles of %s to %s\n", src->dump_name (), dst->dump_name ()); } + profile_count orig_count = dst->count; + if (dst->count.initialized_p () && dst->count.ipa () == dst->count) dst->count += src->count.ipa (); else @@ -644,10 +647,21 @@ if (!preserve_body) src->release_body (); /* Update summary. */ - symtab->call_cgraph_removal_hooks (dst); - symtab->call_cgraph_insertion_hooks (dst); + compute_fn_summary (dst, 0); } - /* TODO: if there is no match, we can scale up. */ + /* We can't update CFG profile, but we can scale IPA profile. CFG + will be scaled according to dst->count after IPA passes. */ + else + { + profile_count to = dst->count; + profile_count::adjust_for_ipa_scaling (&to, &orig_count); + struct cgraph_edge *e; + + for (e = dst->callees; e; e = e->next_callee) + e->count = e->count.apply_scale (to, orig_count); + for (e = dst->indirect_calls; e; e = e->next_callee) + e->count = e->count.apply_scale (to, orig_count); + } src->decl = oldsrcdecl; } Index: ipa-fnsummary.c =================================================================== --- ipa-fnsummary.c (revision 267377) +++ ipa-fnsummary.c (working copy) @@ -2180,6 +2180,17 @@ es->call_stmt_time = this_time; es->loop_depth = bb_loop_depth (bb); edge_set_predicate (edge, &bb_predicate); + if (edge->speculative) + { + cgraph_edge *direct, *indirect; + ipa_ref *ref; + edge->speculative_call_info (direct, indirect, ref); + gcc_assert (direct == edge); + ipa_call_summary *es2 + = ipa_call_summaries->get_create (indirect); + ipa_call_summaries->duplicate (edge, indirect, + es, es2); + } } /* TODO: When conditional jump or swithc is known to be constant, but @@ -2491,7 +2502,8 @@ ipa_update_overall_fn_summary but because computation happens in different order the roundoff errors result in slight changes. */ ipa_update_overall_fn_summary (node); - gcc_assert (info->size == info->self_size); + /* In LTO mode we may have speculative edges set. */ + gcc_assert (in_lto_p || info->size == info->self_size); }