I like the approach in general -- in the past, indirect call promotion and function inlining heuristics are disconnected -- which can lead to either missing promotions or useless ones. This approach solves the problem.
On Sun, Aug 11, 2013 at 4:11 PM, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this patch adds simple cost model into indirect call speculation. First we > do not > turn calls into speculative calls when it seems bad idea (i.e. call is cold) > and during inlining we remove speculations that do not seem benefical. > On modern chip speculative call sequence without inlining is not really going > to fare better than indirect call because of indirect call predictor. > So we keep them only if the call was inlined or if the callee is turned to > clone > or CONST/PURE flags are propagated to them. > > We may want to add target hook specifying if target support indirect call > predictor, > but I am not sure how important this is in practice. It might be also useful to introduce a parameter to control the behavior so that people can do better experiment. The capability of indirect branch predictions varies a lot depending on the target. I only noticed a couple of indentation problems. thanks, David > > To enable cost model everywhere, the old unit-local transform code now does > nothing > but does sanity checking and debug output dumping. > /* Speculative call consist of three components: > *************** cgraph_speculative_call_info (struct cgr > *** 1107,1113 **** > if (e2->call_stmt) > { indentation? > e = cgraph_edge (e->caller, e2->call_stmt); > ! gcc_assert (!e->speculative && !e->indirect_unknown_callee); > } > else > for (e = e->caller->callees; > --- 1110,1116 ---- > if (e2->call_stmt) > { > e = cgraph_edge (e->caller, e2->call_stmt); > ! gcc_assert (e->speculative && !e->indirect_unknown_callee); > } > else > for (e = e->caller->callees; > *************** cgraph_redirect_edge_callee (struct cgra > *** 1147,1153 **** > Remove the speculative call sequence and return edge representing the > call. > It is up to caller to redirect the call as appropriate. */ > > ! static struct cgraph_edge * > cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl) > { > struct cgraph_edge *e2; > --- 1150,1156 ---- > Remove the speculative call sequence and return edge representing the > call. > It is up to caller to redirect the call as appropriate. */ > > ! struct cgraph_edge * > cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl) > { > struct cgraph_edge *e2; > *************** cgraph_resolve_speculation (struct cgrap > *** 1159,1170 **** > { > if (dump_file) > { > ! fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has " > ! "turned out to have contradicitng known target ", > ! xstrdup (cgraph_node_name (edge->caller)), > edge->caller->symbol.order, > ! xstrdup (cgraph_node_name (e2->callee)), > e2->callee->symbol.order); > ! print_generic_expr (dump_file, callee_decl, 0); > ! fprintf (dump_file, "\n"); > } > } > else > --- 1162,1182 ---- > { > if (dump_file) > { > ! if (callee_decl) > ! { > ! fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i > has " > ! "turned out to have contradicitng known target ", > ! xstrdup (cgraph_node_name (edge->caller)), > edge->caller->symbol.order, > ! xstrdup (cgraph_node_name (e2->callee)), > e2->callee->symbol.order); > ! print_generic_expr (dump_file, callee_decl, 0); > ! fprintf (dump_file, "\n"); > ! } > ! else > ! { > ! fprintf (dump_file, "Removing speculative call %s/%i => > %s/%i\n", > ! xstrdup (cgraph_node_name (edge->caller)), > edge->caller->symbol.order, > ! xstrdup (cgraph_node_name (e2->callee)), > e2->callee->symbol.order); > ! } > } > } > else > *************** cgraph_redirect_edge_call_stmt_to_callee > *** 1264,1275 **** > cgraph_speculative_call_info (e, e, e2, ref); > if (gimple_call_fndecl (e->call_stmt)) > e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt)); > ! else > { > if (dump_file) > ! fprintf (dump_file, "Expanding speculative call of %s/%i -> > %s/%i\n", > xstrdup (cgraph_node_name (e->caller)), > e->caller->symbol.order, > xstrdup (cgraph_node_name (e->callee)), > e->callee->symbol.order); > gcc_assert (e2->speculative); > push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl)); > new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred), > --- 1276,1299 ---- > cgraph_speculative_call_info (e, e, e2, ref); > if (gimple_call_fndecl (e->call_stmt)) > e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt)); > ! if (!gimple_check_call_matching_types (e->call_stmt, > e->callee->symbol.decl, > ! true)) > { > + e = cgraph_resolve_speculation (e, NULL); > if (dump_file) > ! fprintf (dump_file, "Not expanding speculative call of %s/%i -> > %s/%i\n" > ! "Type mismatch.\n", > xstrdup (cgraph_node_name (e->caller)), > e->caller->symbol.order, > xstrdup (cgraph_node_name (e->callee)), > e->callee->symbol.order); > + } > + else > + { > + if (dump_file) > + fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i > count:" > + HOST_WIDEST_INT_PRINT_DEC"\n", > + xstrdup (cgraph_node_name (e->caller)), > e->caller->symbol.order, > + xstrdup (cgraph_node_name (e->callee)), > e->callee->symbol.order, > + (HOST_WIDEST_INT)e->count); > gcc_assert (e2->speculative); > push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl)); > new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred), > Index: cgraph.h > =================================================================== > *** cgraph.h (revision 201640) > --- cgraph.h (working copy) > *************** bool cgraph_propagate_frequency (struct > *** 726,732 **** > struct cgraph_node * cgraph_function_node (struct cgraph_node *, > enum availability *avail = NULL); > bool cgraph_get_body (struct cgraph_node *node); > ! void > cgraph_turn_edge_to_speculative (struct cgraph_edge *, > struct cgraph_node *, > gcov_type, int); > --- 726,732 ---- > struct cgraph_node * cgraph_function_node (struct cgraph_node *, > enum availability *avail = NULL); > bool cgraph_get_body (struct cgraph_node *node); > ! struct cgraph_edge * > cgraph_turn_edge_to_speculative (struct cgraph_edge *, > struct cgraph_node *, > gcov_type, int); > *************** struct cgraph_node *cgraph_function_vers > *** 783,788 **** > --- 783,789 ---- > basic_block, const char *); > void tree_function_versioning (tree, tree, vec<ipa_replace_map_p, va_gc> *, > bool, bitmap, bool, bitmap, basic_block); > + struct cgraph_edge *cgraph_resolve_speculation (struct cgraph_edge *, tree); > > /* In cgraphbuild.c */ > unsigned int rebuild_cgraph_edges (void); > *************** symtab_real_symbol_p (symtab_node node) > *** 1398,1401 **** > --- 1399,1414 ---- > return false; > return true; > } > + > + /* Return true if NODE can be discarded by linker from the binary. */ > + > + static inline bool > + symtab_can_be_discarded (symtab_node node) > + { > + return (DECL_EXTERNAL (node->symbol.decl) > + || (DECL_ONE_ONLY (node->symbol.decl) > + && node->symbol.resolution != LDPR_PREVAILING_DEF > + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY > + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY_EXP)); > + } > #endif /* GCC_CGRAPH_H */ > Index: value-prof.c > =================================================================== > *** value-prof.c (revision 201640) > --- value-prof.c (working copy) > *************** gimple_ic_transform (gimple_stmt_iterato > *** 1431,1438 **** > gimple stmt = gsi_stmt (*gsi); > histogram_value histogram; > gcov_type val, count, all, bb_all; > - gcov_type prob; > - gimple modify; > struct cgraph_node *direct_call; > > if (gimple_code (stmt) != GIMPLE_CALL) > --- 1431,1436 ---- > *************** gimple_ic_transform (gimple_stmt_iterato > *** 1452,1463 **** > count = histogram->hvalue.counters [1]; > all = histogram->hvalue.counters [2]; > > - if (4 * count <= 3 * all) > - { > - gimple_remove_histogram_value (cfun, stmt, histogram); > - return false; > - } > - > bb_all = gimple_bb (stmt)->count; > /* The order of CHECK_COUNTER calls is important - > since check_counter can correct the third parameter > --- 1450,1455 ---- > *************** gimple_ic_transform (gimple_stmt_iterato > *** 1469,1478 **** > return false; > } > > ! if (all > 0) > ! prob = GCOV_COMPUTE_SCALE (count, all); > ! else > ! prob = 0; > direct_call = find_func_by_profile_id ((int)val); > > if (direct_call == NULL) > --- 1461,1469 ---- > return false; > } > > ! if (4 * count <= 3 * all) > ! return false; > ! > direct_call = find_func_by_profile_id ((int)val); > > if (direct_call == NULL) > *************** gimple_ic_transform (gimple_stmt_iterato > *** 1488,1499 **** > } > return false; > } > - gimple_remove_histogram_value (cfun, stmt, histogram); > > if (!check_ic_target (stmt, direct_call)) > ! return false; > ! > ! modify = gimple_ic (stmt, direct_call, prob, count, all); > > if (dump_file) > { > --- 1479,1499 ---- > } > return false; > } > > if (!check_ic_target (stmt, direct_call)) > ! { > ! if (dump_file) > ! { > ! fprintf (dump_file, "Indirect call -> direct call "); > ! print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); > ! fprintf (dump_file, "=> "); > ! print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM); > ! fprintf (dump_file, " transformation skipped because of type > mismatch"); > ! print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > ! } > ! gimple_remove_histogram_value (cfun, stmt, histogram); > ! return false; > ! } > > if (dump_file) > { > *************** gimple_ic_transform (gimple_stmt_iterato > *** 1501,1510 **** > print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); > fprintf (dump_file, "=> "); > print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM); > ! fprintf (dump_file, " transformation on insn "); > print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > - fprintf (dump_file, " to "); > - print_gimple_stmt (dump_file, modify, 0, TDF_SLIM); > fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC > " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all); > } > --- 1501,1508 ---- > print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); > fprintf (dump_file, "=> "); > print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM); > ! fprintf (dump_file, " transformation on insn postponned to > ipa-profile"); > print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC > " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all); > } > Index: ipa-inline-transform.c > =================================================================== > *** ipa-inline-transform.c (revision 201640) > --- ipa-inline-transform.c (working copy) > *************** along with GCC; see the file COPYING3. > *** 46,51 **** > --- 46,52 ---- > > int ncalls_inlined; > int nfunctions_inlined; > + bool speculation_removed; > > /* Scale frequency of NODE edges by FREQ_SCALE. */ > > *************** clone_inlined_nodes (struct cgraph_edge > *** 134,139 **** > --- 135,141 ---- > bool update_original, int *overall_size) > { > struct cgraph_node *inlining_into; > + struct cgraph_edge *next; > > if (e->caller->global.inlined_to) > inlining_into = e->caller->global.inlined_to; > *************** clone_inlined_nodes (struct cgraph_edge > *** 186,194 **** > e->callee->global.inlined_to = inlining_into; > > /* Recursively clone all bodies. */ > ! for (e = e->callee->callees; e; e = e->next_callee) > ! if (!e->inline_failed) > ! clone_inlined_nodes (e, duplicate, update_original, overall_size); > } > > > --- 188,204 ---- > e->callee->global.inlined_to = inlining_into; > > /* Recursively clone all bodies. */ > ! for (e = e->callee->callees; e; e = next) > ! { > ! next = e->next_callee; > ! if (!e->inline_failed) > ! clone_inlined_nodes (e, duplicate, update_original, overall_size); > ! if (e->speculative && !speculation_useful_p (e, true)) > ! { > ! cgraph_resolve_speculation (e, NULL); > ! speculation_removed = true; > ! } > ! } > } > > > *************** inline_call (struct cgraph_edge *e, bool > *** 218,223 **** > --- 228,234 ---- > bool predicated = inline_edge_summary (e)->predicate != NULL; > #endif > > + speculation_removed = false; > /* Don't inline inlined edges. */ > gcc_assert (e->inline_failed); > /* Don't even think of inlining inline clone. */ > *************** inline_call (struct cgraph_edge *e, bool > *** 267,272 **** > --- 278,284 ---- > error due to INLINE_SIZE_SCALE roudoff errors. */ > gcc_assert (!update_overall_summary || !overall_size || new_edges_found > || abs (estimated_growth - (new_size - old_size)) <= 1 > + || speculation_removed > /* FIXME: a hack. Edges with false predicate are accounted > wrong, we should remove them from callgraph. */ > || predicated); > Index: ipa-inline.c > =================================================================== > *** ipa-inline.c (revision 201640) > --- ipa-inline.c (working copy) > *************** report_inline_failed_reason (struct cgra > *** 229,238 **** > We check whether inlining is possible at all and whether > caller growth limits allow doing so. > > ! if REPORT is true, output reason to the dump file. */ > > static bool > ! can_inline_edge_p (struct cgraph_edge *e, bool report) > { > bool inlinable = true; > enum availability avail; > --- 229,241 ---- > We check whether inlining is possible at all and whether > caller growth limits allow doing so. > > ! if REPORT is true, output reason to the dump file. > ! > ! if DISREGARD_LIMITES is true, ignore size limits.*/ > > static bool > ! can_inline_edge_p (struct cgraph_edge *e, bool report, > ! bool disregard_limits = false) > { > bool inlinable = true; > enum availability avail; > *************** can_inline_edge_p (struct cgraph_edge *e > *** 309,314 **** > --- 312,318 ---- > } > /* Check if caller growth allows the inlining. */ > else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl) > + && !disregard_limits > && !lookup_attribute ("flatten", > DECL_ATTRIBUTES > (e->caller->global.inlined_to > *************** heap_edge_removal_hook (struct cgraph_ed > *** 1400,1405 **** > --- 1404,1482 ---- > } > } > > + /* Return true if speculation of edge E seems useful. > + If ANTICIPATE_INLINING is true, be conservative and hope that E > + may get inlined. */ > + > + bool > + speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining) > + { > + enum availability avail; > + struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, > &avail); > + struct cgraph_edge *direct, *indirect; > + struct ipa_ref *ref; > + > + gcc_assert (e->speculative && !e->indirect_unknown_callee); > + > + if (!cgraph_maybe_hot_edge_p (e)) > + return false; > + > + /* See if IP optimizations found something potentially useful about the > + function. For now we look only for CONST/PURE flags. Almost > everything > + else we propagate is useless. */ > + if (avail >= AVAIL_AVAILABLE) > + { > + int ecf_flags = flags_from_decl_or_type (target->symbol.decl); > + if (ecf_flags & ECF_CONST) > + { > + cgraph_speculative_call_info (e, direct, indirect, ref); > + if (!(indirect->indirect_info->ecf_flags & ECF_CONST)) > + return true; > + } > + else if (ecf_flags & ECF_PURE) > + { > + cgraph_speculative_call_info (e, direct, indirect, ref); > + if (!(indirect->indirect_info->ecf_flags & ECF_PURE)) > + return true; > + } > + } > + /* If we did not managed to inline the function nor redirect > + to an ipa-cp clone (that are seen by having local flag set), > + it is probably pointless to inline it unless hardware is missing > + indirect call predictor. */ > + if (!anticipate_inlining && e->inline_failed && !target->local.local) > + return false; > + /* For overwritable targets there is not much to do. */ > + if (e->inline_failed && !can_inline_edge_p (e, false, true)) > + return false; > + /* OK, speculation seems interesting. */ > + return true; > + } > + > + /* We know that EDGE is not going to be inlined. > + See if we can remove speculation. */ > + > + static void > + resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge > *edge) > + { > + if (edge->speculative && !speculation_useful_p (edge, false)) > + { > + struct cgraph_node *node = edge->caller; > + struct cgraph_node *where = node->global.inlined_to > + ? node->global.inlined_to : node; > + bitmap updated_nodes = BITMAP_ALLOC (NULL); > + > + cgraph_resolve_speculation (edge, NULL); > + reset_node_growth_cache (where); > + reset_edge_caches (where); > + inline_update_overall_summary (where); > + update_caller_keys (edge_heap, where, > + updated_nodes, NULL); > + reset_node_growth_cache (where); > + BITMAP_FREE (updated_nodes); > + } > + } > + > /* We use greedy algorithm for inlining of small functions: > All inline candidates are put into prioritized heap ordered in > increasing badness. > *************** inline_small_functions (void) > *** 1478,1491 **** > /* Populate the heeap with all edges we might inline. */ > > FOR_EACH_DEFINED_FUNCTION (node) > ! if (!node->global.inlined_to) > ! { > ! if (dump_file) > ! fprintf (dump_file, "Enqueueing calls of %s/%i.\n", > ! cgraph_node_name (node), node->symbol.order); > > ! for (edge = node->callers; edge; edge = edge->next_caller) > if (edge->inline_failed > && can_inline_edge_p (edge, true) > && want_inline_small_function_p (edge, true) > && edge->inline_failed) > --- 1555,1573 ---- > /* Populate the heeap with all edges we might inline. */ > > FOR_EACH_DEFINED_FUNCTION (node) > ! { > ! bool update = false; > ! struct cgraph_edge *next; > > ! if (dump_file) > ! fprintf (dump_file, "Enqueueing calls in %s/%i.\n", > ! cgraph_node_name (node), node->symbol.order); > ! > ! for (edge = node->callees; edge; edge = next) > ! { > ! next = edge->next_callee; > if (edge->inline_failed > + && !edge->aux > && can_inline_edge_p (edge, true) > && want_inline_small_function_p (edge, true) > && edge->inline_failed) > *************** inline_small_functions (void) > *** 1493,1499 **** > gcc_assert (!edge->aux); > update_edge_key (edge_heap, edge); > } > ! } > > gcc_assert (in_lto_p > || !max_count > --- 1575,1598 ---- > gcc_assert (!edge->aux); > update_edge_key (edge_heap, edge); > } > ! if (edge->speculative && !speculation_useful_p (edge, edge->aux != > NULL)) > ! { > ! cgraph_resolve_speculation (edge, NULL); > ! update = true; > ! } > ! } > ! if (update) > ! { > ! struct cgraph_node *where = node->global.inlined_to > ! ? node->global.inlined_to : node; > ! inline_update_overall_summary (where); > ! reset_node_growth_cache (where); > ! reset_edge_caches (where); > ! update_caller_keys (edge_heap, where, > ! updated_nodes, NULL); > ! bitmap_clear (updated_nodes); > ! } > ! } > > gcc_assert (in_lto_p > || !max_count > *************** inline_small_functions (void) > *** 1534,1540 **** > } > > if (!can_inline_edge_p (edge, true)) > ! continue; > > callee = cgraph_function_or_thunk_node (edge->callee, NULL); > growth = estimate_edge_growth (edge); > --- 1633,1642 ---- > } > > if (!can_inline_edge_p (edge, true)) > ! { > ! resolve_noninline_speculation (edge_heap, edge); > ! continue; > ! } > > callee = cgraph_function_or_thunk_node (edge->callee, NULL); > growth = estimate_edge_growth (edge); > *************** inline_small_functions (void) > *** 1568,1578 **** > { > edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT; > report_inline_failed_reason (edge); > continue; > } > > if (!want_inline_small_function_p (edge, true)) > ! continue; > > /* Heuristics for inlining small functions works poorly for > recursive calls where we do efect similar to loop unrolling. > --- 1670,1684 ---- > { > edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT; > report_inline_failed_reason (edge); > + resolve_noninline_speculation (edge_heap, edge); > continue; > } > > if (!want_inline_small_function_p (edge, true)) > ! { > ! resolve_noninline_speculation (edge_heap, edge); > ! continue; > ! } > > /* Heuristics for inlining small functions works poorly for > recursive calls where we do efect similar to loop unrolling. > *************** inline_small_functions (void) > *** 1588,1593 **** > --- 1694,1700 ---- > ? &new_indirect_edges : NULL)) > { > edge->inline_failed = CIF_RECURSIVE_INLINING; > + resolve_noninline_speculation (edge_heap, edge); > continue; > } > reset_edge_caches (where); > *************** inline_small_functions (void) > *** 1596,1601 **** > --- 1703,1709 ---- > if (flag_indirect_inlining) > add_new_edges_to_heap (edge_heap, new_indirect_edges); > update_callee_keys (edge_heap, where, updated_nodes); > + bitmap_clear (updated_nodes); > } > else > { > *************** inline_small_functions (void) > *** 1621,1626 **** > --- 1729,1735 ---- > edge->inline_failed > = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl) > ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED); > + resolve_noninline_speculation (edge_heap, edge); > continue; > } > else if (depth && dump_file) > *************** ipa_inline (void) > *** 1773,1778 **** > --- 1882,1888 ---- > struct cgraph_node **order = > XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); > int i; > + int cold; > > if (in_lto_p && optimize) > ipa_update_after_lto_read (); > *************** ipa_inline (void) > *** 1820,1885 **** > code size will shrink because the out-of-line copy is eliminated. > We do this regardless on the callee size as long as function growth > limits > are met. */ > ! if (flag_inline_functions_called_once) > ! { > ! int cold; > ! if (dump_file) > ! fprintf (dump_file, > ! "\nDeciding on functions to be inlined into all callers:\n"); > > ! /* Inlining one function called once has good chance of preventing > ! inlining other function into the same callee. Ideally we should > ! work in priority order, but probably inlining hot functions first > ! is good cut without the extra pain of maintaining the queue. > ! > ! ??? this is not really fitting the bill perfectly: inlining function > ! into callee often leads to better optimization of callee due to > ! increased context for optimization. > ! For example if main() function calls a function that outputs help > ! and then function that does the main optmization, we should inline > ! the second with priority even if both calls are cold by themselves. > ! > ! We probably want to implement new predicate replacing our use of > ! maybe_hot_edge interpreted as maybe_hot_edge || callee is known > ! to be hot. */ > ! for (cold = 0; cold <= 1; cold ++) > { > ! FOR_EACH_DEFINED_FUNCTION (node) > { > ! if (want_inline_function_to_all_callers_p (node, cold)) > { > ! int num_calls = 0; > ! struct cgraph_edge *e; > ! for (e = node->callers; e; e = e->next_caller) > ! num_calls++; > ! while (node->callers && !node->global.inlined_to) > ! { > ! struct cgraph_node *caller = node->callers->caller; > > ! if (dump_file) > ! { > ! fprintf (dump_file, > ! "\nInlining %s size %i.\n", > ! cgraph_node_name (node), > ! inline_summary (node)->size); > ! fprintf (dump_file, > ! " Called once from %s %i insns.\n", > ! cgraph_node_name (node->callers->caller), > ! inline_summary > (node->callers->caller)->size); > ! } > > ! inline_call (node->callers, true, NULL, NULL, true); > if (dump_file) > ! fprintf (dump_file, > ! " Inlined into %s which now has %i size\n", > ! cgraph_node_name (caller), > ! inline_summary (caller)->size); > ! if (!num_calls--) > ! { > ! if (dump_file) > ! fprintf (dump_file, "New calls found; giving > up.\n"); > ! break; > ! } > } > } > } > --- 1930,2012 ---- > code size will shrink because the out-of-line copy is eliminated. > We do this regardless on the callee size as long as function growth > limits > are met. */ > ! if (dump_file) > ! fprintf (dump_file, > ! "\nDeciding on functions to be inlined into all callers and > removing useless speculations:\n"); > > ! /* Inlining one function called once has good chance of preventing > ! inlining other function into the same callee. Ideally we should > ! work in priority order, but probably inlining hot functions first > ! is good cut without the extra pain of maintaining the queue. > ! > ! ??? this is not really fitting the bill perfectly: inlining function > ! into callee often leads to better optimization of callee due to > ! increased context for optimization. > ! For example if main() function calls a function that outputs help > ! and then function that does the main optmization, we should inline > ! the second with priority even if both calls are cold by themselves. > ! > ! We probably want to implement new predicate replacing our use of > ! maybe_hot_edge interpreted as maybe_hot_edge || callee is known > ! to be hot. */ > ! for (cold = 0; cold <= 1; cold ++) > ! { > ! FOR_EACH_DEFINED_FUNCTION (node) > { > ! struct cgraph_edge *edge, *next; > ! bool update=false; > ! > ! for (edge = node->callees; edge; edge = next) > { > ! next = edge->next_callee; > ! if (edge->speculative && !speculation_useful_p (edge, false)) > { > ! cgraph_resolve_speculation (edge, NULL); > ! update = true; > ! } > ! } > ! if (update) > ! { > ! struct cgraph_node *where = node->global.inlined_to > ! ? node->global.inlined_to : node; > ! reset_node_growth_cache (where); > ! reset_edge_caches (where); > ! inline_update_overall_summary (where); > ! } > ! if (flag_inline_functions_called_once > ! && want_inline_function_to_all_callers_p (node, cold)) > ! { > ! int num_calls = 0; > ! struct cgraph_edge *e; > ! for (e = node->callers; e; e = e->next_caller) > ! num_calls++; > ! while (node->callers && !node->global.inlined_to) > ! { > ! struct cgraph_node *caller = node->callers->caller; > > ! if (dump_file) > ! { > ! fprintf (dump_file, > ! "\nInlining %s size %i.\n", > ! cgraph_node_name (node), > ! inline_summary (node)->size); > ! fprintf (dump_file, > ! " Called once from %s %i insns.\n", > ! cgraph_node_name (node->callers->caller), > ! inline_summary (node->callers->caller)->size); > ! } > > ! inline_call (node->callers, true, NULL, NULL, true); > ! if (dump_file) > ! fprintf (dump_file, > ! " Inlined into %s which now has %i size\n", > ! cgraph_node_name (caller), > ! inline_summary (caller)->size); > ! if (!num_calls--) > ! { > if (dump_file) > ! fprintf (dump_file, "New calls found; giving up.\n"); > ! break; > } > } > } > Index: ipa-inline.h > =================================================================== > *** ipa-inline.h (revision 201640) > --- ipa-inline.h (working copy) > *************** inline_hints do_estimate_edge_hints (str > *** 226,231 **** > --- 226,232 ---- > void initialize_growth_caches (void); > void free_growth_caches (void); > void compute_inline_parameters (struct cgraph_node *, bool); > + bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining); > > /* In ipa-inline-transform.c */ > bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge_p> *, int *, > bool); > Index: ipa.c > =================================================================== > *** ipa.c (revision 201640) > --- ipa.c (working copy) > *************** bool > *** 768,778 **** > can_replace_by_local_alias (symtab_node node) > { > return (symtab_node_availability (node) > AVAIL_OVERWRITABLE > ! && !DECL_EXTERNAL (node->symbol.decl) > ! && (!DECL_ONE_ONLY (node->symbol.decl) > ! || node->symbol.resolution == LDPR_PREVAILING_DEF > ! || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY > ! || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY_EXP)); > } > > /* Mark visibility of all functions. > --- 768,774 ---- > can_replace_by_local_alias (symtab_node node) > { > return (symtab_node_availability (node) > AVAIL_OVERWRITABLE > ! && !symtab_can_be_discarded (node)); > } > > /* Mark visibility of all functions. > *************** ipa_profile (void) > *** 1407,1459 **** > bool something_changed = false; > int i; > gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; > ! > ! /* Produce speculative calls: we saved common traget from porfiling into > ! e->common_target_id. Now, at link time, we can look up corresponding > ! function node and produce speculative call. */ > ! if (in_lto_p) > ! { > ! struct cgraph_edge *e; > ! struct cgraph_node *n,*n2; > ! > ! init_node_map (false); > ! FOR_EACH_DEFINED_FUNCTION (n) > ! { > ! bool update = false; > ! > ! for (e = n->indirect_calls; e; e = e->next_callee) > ! if (e->indirect_info->common_target_id) > ! { > ! n2 = find_func_by_profile_id > (e->indirect_info->common_target_id); > ! if (n2) > ! { > ! if (dump_file) > ! { > ! fprintf (dump_file, "Indirect call -> direct call > from" > ! " other module %s/%i => %s/%i, prob %3.2f\n", > ! xstrdup (cgraph_node_name (n)), > n->symbol.order, > ! xstrdup (cgraph_node_name (n2)), > n2->symbol.order, > ! e->indirect_info->common_target_probability > ! / (float)REG_BR_PROB_BASE); > ! } > ! cgraph_turn_edge_to_speculative > ! (e, n2, > ! apply_scale (e->count, > ! > e->indirect_info->common_target_probability), > ! apply_scale (e->frequency, > ! > e->indirect_info->common_target_probability)); > ! update = true; > ! } > ! else > ! if (dump_file) > ! fprintf (dump_file, "Function with profile-id %i not > found.\n", > ! e->indirect_info->common_target_id); > ! } > ! if (update) > ! inline_update_overall_summary (n); > ! } > ! del_node_map (); > ! } > > if (dump_file) > dump_histogram (dump_file, histogram); > --- 1403,1411 ---- > bool something_changed = false; > int i; > gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; > ! struct cgraph_node *n,*n2; > ! int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = > 0; > ! bool node_map_initialized = false; > > if (dump_file) > dump_histogram (dump_file, histogram); > *************** ipa_profile (void) > *** 1523,1528 **** > --- 1475,1580 ---- > histogram.release(); > free_alloc_pool (histogram_pool); > > + /* Produce speculative calls: we saved common traget from porfiling into > + e->common_target_id. Now, at link time, we can look up corresponding > + function node and produce speculative call. */ > + > + FOR_EACH_DEFINED_FUNCTION (n) > + { > + bool update = false; > + > + for (e = n->indirect_calls; e; e = e->next_callee) > + { > + if (n->count) > + nindirect++; > + if (e->indirect_info->common_target_id) > + { > + if (!node_map_initialized) > + init_node_map (false); > + node_map_initialized = true; > + ncommon++; > + n2 = find_func_by_profile_id > (e->indirect_info->common_target_id); > + if (n2) > + { > + if (dump_file) > + { > + fprintf (dump_file, "Indirect call -> direct call from" > + " other module %s/%i => %s/%i, prob %3.2f\n", > + xstrdup (cgraph_node_name (n)), > n->symbol.order, > + xstrdup (cgraph_node_name (n2)), > n2->symbol.order, > + e->indirect_info->common_target_probability > + / (float)REG_BR_PROB_BASE); > + } > + if (e->indirect_info->common_target_probability > + < REG_BR_PROB_BASE / 2) > + { > + nuseless++; > + if (dump_file) > + fprintf (dump_file, > + "Not speculating: probability is too > low.\n"); > + } > + else if (!cgraph_maybe_hot_edge_p (e)) > + { > + nuseless++; > + if (dump_file) > + fprintf (dump_file, > + "Not speculating: call is cold.\n"); > + } > + else if (cgraph_function_body_availability (n2) > + <= AVAIL_OVERWRITABLE > + && symtab_can_be_discarded ((symtab_node) n2)) > + { > + nuseless++; > + if (dump_file) > + fprintf (dump_file, > + "Not speculating: target is overwritable " > + "and can be discarded.\n"); > + } > + else > + { > + /* Target may be overwritable, but profile says that > + control flow goes to this particular implementation > + of N2. Speculate on the local alias to allow > inlining. > + */ > + if (!symtab_can_be_discarded ((symtab_node) n2)) > + n2 = cgraph (symtab_nonoverwritable_alias > ((symtab_node)n2)); > + nconverted++; > + cgraph_turn_edge_to_speculative > + (e, n2, > + apply_scale (e->count, > + > e->indirect_info->common_target_probability), > + apply_scale (e->frequency, > + > e->indirect_info->common_target_probability)); > + update = true; > + } > + } > + else > + { > + if (dump_file) > + fprintf (dump_file, "Function with profile-id %i not > found.\n", > + e->indirect_info->common_target_id); > + nunknown++; > + } > + } > + } > + if (update) > + inline_update_overall_summary (n); > + } > + if (node_map_initialized) > + del_node_map (); > + if (dump_file && nindirect) > + fprintf (dump_file, > + "%i indirect calls trained.\n" > + "%i (%3.2f%%) have common target.\n" > + "%i (%3.2f%%) targets was not found.\n" > + "%i (%3.2f%%) speculations seems useless.\n" > + "%i (%3.2f%%) speculations produced.\n", > + nindirect, > + ncommon, ncommon * 100.0 / nindirect, > + nunknown, nunknown * 100.0 / nindirect, > + nuseless, nuseless * 100.0 / nindirect, > + nconverted, nconverted * 100.0 / nindirect); > + > order_pos = ipa_reverse_postorder (order); > for (i = order_pos - 1; i >= 0; i--) > {