Re: [PATCH] Time profiler - phase 2

Jan Hubicka Sun, 15 Dec 2013 15:22:04 -0800

> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 93e857df..d5a0ac8 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2013-12-15  Martin Liska  <marxin.li...@gmail.com>
> +         Jan Hubicka  <j...@suse.cz>
> +
> +     * cgraphunit.c (node_cmp): New function.
> +     (expand_all_functions): Function ordering added.
> +     * common.opt: New profile based function reordering flag introduced.
> +     * lto-partition.c: Support for time profile added.
> +     * lto.c: Likewise.
> +     * predict.c (handle_missing_profiles): Time profile handled in
> +       missing profiles.
> +


OK, thanks, with the changes bellow!
(I tought this patch was already in! Also please be careful about
applying the changes - it seems that in the previous commit you
M
omitted some)
> @@ -1842,11 +1859,14 @@ expand_function (struct cgraph_node *node)
>     to use subsections to make the output functions appear in top-down
>     order).  */
>  
> +
Bogus whitespace
>  static void
>  expand_all_functions (void)
>  {
>    struct cgraph_node *node;
>    struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, 
> cgraph_n_nodes);
> +
> +  unsigned int expanded_func_count = 0, profiled_func_count = 0;
>    int order_pos, new_order_pos = 0;
>    int i;
>  
> @@ -1859,20 +1879,39 @@ expand_all_functions (void)
>      if (order[i]->process)
>        order[new_order_pos++] = order[i];
>  
> +  if (flag_profile_reorder_functions)
> +    qsort (order, new_order_pos, sizeof (struct cgraph_node *), node_cmp);
> +
>    for (i = new_order_pos - 1; i >= 0; i--)
>      {
>        node = order[i];
> +
>        if (node->process)
>       {
> +     expanded_func_count++;
> +     if(node->tp_first_run)
> +       profiled_func_count++;
> +
> +    if (cgraph_dump_file)
> +      fprintf (cgraph_dump_file, "Time profile order in 
> expand_all_functions:%s:%d\n", node->asm_name (), node->tp_first_run);
> +
>         node->process = 0;
>         expand_function (node);
>       }
>      }
> +
> +    if (in_lto_p && dump_file)
> +      fprintf (dump_file, "Expanded functions with time profile 
> (%s):%u/%u\n",
> +               main_input_filename, profiled_func_count, 
> expanded_func_count);
> +
> +  if (cgraph_dump_file && flag_profile_reorder_functions && in_lto_p)
> +    fprintf (cgraph_dump_file, "Expanded functions with time 
> profile:%u/%u\n",
> +             profiled_func_count, expanded_func_count);

Make the dumps unconditoinal, I do not see why they should be in_lto_p here.
> @@ -689,7 +713,6 @@ lto_balanced_map (void)
>         best_i = i;
>         best_n_nodes = lto_symtab_encoder_size (partition->encoder);
>         best_total_size = total_size;
> -       best_varpool_pos = varpool_pos;
>       }
>        if (cgraph_dump_file)
>       fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i "
> @@ -707,7 +730,6 @@ lto_balanced_map (void)
>               fprintf (cgraph_dump_file, "Unwinding %i insertions to step 
> %i\n",
>                        i - best_i, best_i);
>             undo_partition (partition, best_n_nodes);
> -           varpool_pos = best_varpool_pos;
>           }
>         i = best_i;
>         /* When we are finished, avoid creating empty partition.  */

I already asked you to remove these changes - they revert earlier fix.

> diff --git a/gcc/predict.c b/gcc/predict.c
> index a5ad34f..1826a06 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -2839,12 +2839,24 @@ handle_missing_profiles (void)
>      {
>        struct cgraph_edge *e;
>        gcov_type call_count = 0;
> +      gcov_type max_tp_first_run = 0;
>        struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
>  
>        if (node->count)
>          continue;
>        for (e = node->callers; e; e = e->next_caller)
> +      {
>          call_count += e->count;
> +
> +     if (e->caller->tp_first_run > max_tp_first_run)
> +       max_tp_first_run = e->caller->tp_first_run;
> +      }
> +
> +      /* If time profile is missing, let assign the maximum that comes from
> +      caller functions.  */
> +      if (!node->tp_first_run && max_tp_first_run)
> +     node->tp_first_run = max_tp_first_run + 1;
> +

I believe you also need minizming node->tp_first_run in ipa_merge_profiles.
>        if (call_count
>            && fn && fn->cfg
>            && (call_count * unlikely_count_fraction >= profile_info->runs))
> diff --git a/gcc/varasm.c b/gcc/varasm.c
> index 5c5025a..f34946c 100644
> --- a/gcc/varasm.c
> +++ b/gcc/varasm.c
> @@ -552,7 +552,14 @@ default_function_section (tree decl, enum node_frequency 
> freq,
>       unlikely executed (this happens especially with function splitting
>       where we can split away unnecessary parts of static constructors.  */
>    if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> -    return get_named_text_section (decl, ".text.startup", NULL);
> +  {
> +    /* If we do have a profile or(and) LTO phase is executed, we do not need
> +    these ELF section.  */
> +    if (!in_lto_p || !flag_profile_values)
> +      return get_named_text_section (decl, ".text.startup", NULL);
> +    else
> +      return NULL;
> +  }
>  
>    /* Similarly for exit.  */
>    if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
> @@ -564,7 +571,10 @@ default_function_section (tree decl, enum node_frequency 
> freq,
>        case NODE_FREQUENCY_UNLIKELY_EXECUTED:
>       return get_named_text_section (decl, ".text.unlikely", NULL);
>        case NODE_FREQUENCY_HOT:
> -     return get_named_text_section (decl, ".text.hot", NULL);
> +        /* If we do have a profile or(and) LTO phase is executed, we do not 
> need
> +           these ELF section.  */
> +        if (!in_lto_p || !flag_profile_values)
> +          return get_named_text_section (decl, ".text.hot", NULL);
>        default:
>       return NULL;
Please duplicate these changes into config/darwin.c that has identical code in 
it.

OK with those changes.

Honza

Re: [PATCH] Time profiler - phase 2

Reply via email to