Hi, this patch makes profile scaling to use same logic in tree-inline and cgraphclones. This reduces roundoff errors and makes it more easy to propagate more info. Compiling tramp3d with profile feedback the number of mismatches after inlining goes down from 248 to 95.
Honza * cgraph.h (cgraph_edge::clone): Update prototype. * cgraphclones.c (cgraph_edge::clone): Update profile scaling. (cgraph_node::create_clone): Update. (cgraph_node::create_version_clone): Update. * tree-inline.c (copy_bb): Update. (expand_call_inline): Update. Index: cgraph.h =================================================================== --- cgraph.h (revision 249092) +++ cgraph.h (working copy) @@ -1649,7 +1649,7 @@ struct GTY((chain_next ("%h.next_caller" /* Create clone of edge in the node N represented by CALL_EXPR the callgraph. */ cgraph_edge * clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, - gcov_type count_scale, int freq_scale, + profile_count num, profile_count den, int freq_scale, bool update_original); /* Verify edge count and frequency. */ Index: cgraphclones.c =================================================================== --- cgraphclones.c (revision 249092) +++ cgraphclones.c (working copy) @@ -86,10 +86,13 @@ along with GCC; see the file COPYING3. cgraph_edge * cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, - gcov_type count_scale, int freq_scale, bool update_original) + profile_count num, profile_count den, + int freq_scale, bool update_original) { cgraph_edge *new_edge; - profile_count gcov_count = count.apply_scale (count_scale, REG_BR_PROB_BASE); + profile_count gcov_count + = (num == profile_count::zero () || den > 0) + ? count.apply_scale (num, den) : count; gcov_type freq; /* We do not want to ignore loop nest after frequency drops to 0. */ @@ -116,7 +119,7 @@ cgraph_edge::clone (cgraph_node *n, gcal { new_edge = n->create_indirect_edge (call_stmt, indirect_info->ecf_flags, - count, freq, false); + gcov_count, freq, false); *new_edge->indirect_info = *indirect_info; } } @@ -428,7 +431,6 @@ cgraph_node::create_clone (tree new_decl { cgraph_node *new_node = symtab->create_empty (); cgraph_edge *e; - gcov_type count_scale; unsigned i; if (new_inlined_to) @@ -453,7 +455,6 @@ cgraph_node::create_clone (tree new_decl new_node->global = global; new_node->global.inlined_to = new_inlined_to; new_node->rtl = rtl; - new_node->count = count; new_node->frequency = frequency; new_node->tp_first_run = tp_first_run; new_node->tm_clone = tm_clone; @@ -475,18 +476,6 @@ cgraph_node::create_clone (tree new_decl else new_node->clone.combined_args_to_skip = args_to_skip; - if (count.initialized_p ()) - { - if (new_node->count > count) - count_scale = REG_BR_PROB_BASE; - else - count_scale = new_node->count.probability_in (count); - } - else - count_scale = 0; - if (update_original) - count -= prof_count; - FOR_EACH_VEC_ELT (redirect_callers, i, e) { /* Redirect calls to the old version node to point to its new @@ -500,12 +489,12 @@ cgraph_node::create_clone (tree new_decl new_node->expand_all_artificial_thunks (); for (e = callees;e; e=e->next_callee) - e->clone (new_node, e->call_stmt, e->lto_stmt_uid, count_scale, + e->clone (new_node, e->call_stmt, e->lto_stmt_uid, new_node->count, count, freq, update_original); for (e = indirect_calls; e; e = e->next_callee) e->clone (new_node, e->call_stmt, e->lto_stmt_uid, - count_scale, freq, update_original); + new_node->count, count, freq, update_original); new_node->clone_references (this); new_node->next_sibling_clone = clones; @@ -514,6 +503,9 @@ cgraph_node::create_clone (tree new_decl clones = new_node; new_node->clone_of = this; + if (update_original) + count -= prof_count; + if (call_duplication_hook) symtab->call_cgraph_duplication_hooks (this, new_node); @@ -911,14 +903,14 @@ cgraph_node::create_version_clone (tree if (!bbs_to_copy || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index)) e->clone (new_version, e->call_stmt, - e->lto_stmt_uid, REG_BR_PROB_BASE, + e->lto_stmt_uid, count, count, CGRAPH_FREQ_BASE, true); for (e = indirect_calls; e; e=e->next_callee) if (!bbs_to_copy || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index)) e->clone (new_version, e->call_stmt, - e->lto_stmt_uid, REG_BR_PROB_BASE, + e->lto_stmt_uid, count, count, CGRAPH_FREQ_BASE, true); FOR_EACH_VEC_ELT (redirect_callers, i, e) Index: tree-inline.c =================================================================== --- tree-inline.c (revision 249092) +++ tree-inline.c (working copy) @@ -2009,7 +2009,9 @@ copy_bb (copy_body_data *id, basic_block struct cgraph_edge *old_edge = edge; edge = edge->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); /* We could also just rescale the frequency, but doing so would introduce roundoff errors and make @@ -2028,7 +2030,9 @@ copy_bb (copy_body_data *id, basic_block old_edge->speculative_call_info (direct, indirect, ref); indirect = indirect->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); if (old_edge->frequency + indirect->frequency) { @@ -4509,7 +4522,9 @@ expand_call_inline (basic_block bb, gimp cg_edge->remove (); edge = id->src_node->callees->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); edge->frequency = freq; edge->count = count;