> diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 93e857df..d5a0ac8 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,14 @@ > +2013-12-15 Martin Liska <marxin.li...@gmail.com> > + Jan Hubicka <j...@suse.cz> > + > + * cgraphunit.c (node_cmp): New function. > + (expand_all_functions): Function ordering added. > + * common.opt: New profile based function reordering flag introduced. > + * lto-partition.c: Support for time profile added. > + * lto.c: Likewise. > + * predict.c (handle_missing_profiles): Time profile handled in > + missing profiles. > +
OK, thanks, with the changes bellow! (I tought this patch was already in! Also please be careful about applying the changes - it seems that in the previous commit you M omitted some) > @@ -1842,11 +1859,14 @@ expand_function (struct cgraph_node *node) > to use subsections to make the output functions appear in top-down > order). */ > > + Bogus whitespace > static void > expand_all_functions (void) > { > struct cgraph_node *node; > struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, > cgraph_n_nodes); > + > + unsigned int expanded_func_count = 0, profiled_func_count = 0; > int order_pos, new_order_pos = 0; > int i; > > @@ -1859,20 +1879,39 @@ expand_all_functions (void) > if (order[i]->process) > order[new_order_pos++] = order[i]; > > + if (flag_profile_reorder_functions) > + qsort (order, new_order_pos, sizeof (struct cgraph_node *), node_cmp); > + > for (i = new_order_pos - 1; i >= 0; i--) > { > node = order[i]; > + > if (node->process) > { > + expanded_func_count++; > + if(node->tp_first_run) > + profiled_func_count++; > + > + if (cgraph_dump_file) > + fprintf (cgraph_dump_file, "Time profile order in > expand_all_functions:%s:%d\n", node->asm_name (), node->tp_first_run); > + > node->process = 0; > expand_function (node); > } > } > + > + if (in_lto_p && dump_file) > + fprintf (dump_file, "Expanded functions with time profile > (%s):%u/%u\n", > + main_input_filename, profiled_func_count, > expanded_func_count); > + > + if (cgraph_dump_file && flag_profile_reorder_functions && in_lto_p) > + fprintf (cgraph_dump_file, "Expanded functions with time > profile:%u/%u\n", > + profiled_func_count, expanded_func_count); Make the dumps unconditoinal, I do not see why they should be in_lto_p here. > @@ -689,7 +713,6 @@ lto_balanced_map (void) > best_i = i; > best_n_nodes = lto_symtab_encoder_size (partition->encoder); > best_total_size = total_size; > - best_varpool_pos = varpool_pos; > } > if (cgraph_dump_file) > fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i " > @@ -707,7 +730,6 @@ lto_balanced_map (void) > fprintf (cgraph_dump_file, "Unwinding %i insertions to step > %i\n", > i - best_i, best_i); > undo_partition (partition, best_n_nodes); > - varpool_pos = best_varpool_pos; > } > i = best_i; > /* When we are finished, avoid creating empty partition. */ I already asked you to remove these changes - they revert earlier fix. > diff --git a/gcc/predict.c b/gcc/predict.c > index a5ad34f..1826a06 100644 > --- a/gcc/predict.c > +++ b/gcc/predict.c > @@ -2839,12 +2839,24 @@ handle_missing_profiles (void) > { > struct cgraph_edge *e; > gcov_type call_count = 0; > + gcov_type max_tp_first_run = 0; > struct function *fn = DECL_STRUCT_FUNCTION (node->decl); > > if (node->count) > continue; > for (e = node->callers; e; e = e->next_caller) > + { > call_count += e->count; > + > + if (e->caller->tp_first_run > max_tp_first_run) > + max_tp_first_run = e->caller->tp_first_run; > + } > + > + /* If time profile is missing, let assign the maximum that comes from > + caller functions. */ > + if (!node->tp_first_run && max_tp_first_run) > + node->tp_first_run = max_tp_first_run + 1; > + I believe you also need minizming node->tp_first_run in ipa_merge_profiles. > if (call_count > && fn && fn->cfg > && (call_count * unlikely_count_fraction >= profile_info->runs)) > diff --git a/gcc/varasm.c b/gcc/varasm.c > index 5c5025a..f34946c 100644 > --- a/gcc/varasm.c > +++ b/gcc/varasm.c > @@ -552,7 +552,14 @@ default_function_section (tree decl, enum node_frequency > freq, > unlikely executed (this happens especially with function splitting > where we can split away unnecessary parts of static constructors. */ > if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED) > - return get_named_text_section (decl, ".text.startup", NULL); > + { > + /* If we do have a profile or(and) LTO phase is executed, we do not need > + these ELF section. */ > + if (!in_lto_p || !flag_profile_values) > + return get_named_text_section (decl, ".text.startup", NULL); > + else > + return NULL; > + } > > /* Similarly for exit. */ > if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED) > @@ -564,7 +571,10 @@ default_function_section (tree decl, enum node_frequency > freq, > case NODE_FREQUENCY_UNLIKELY_EXECUTED: > return get_named_text_section (decl, ".text.unlikely", NULL); > case NODE_FREQUENCY_HOT: > - return get_named_text_section (decl, ".text.hot", NULL); > + /* If we do have a profile or(and) LTO phase is executed, we do not > need > + these ELF section. */ > + if (!in_lto_p || !flag_profile_values) > + return get_named_text_section (decl, ".text.hot", NULL); > default: > return NULL; Please duplicate these changes into config/darwin.c that has identical code in it. OK with those changes. Honza