On Sat, 25 Jun 2011, Jan Hubicka wrote:

> Hi,
> just for those who are interested, this is quick&dirty patch adding another
> pass of local optimization passes at WPA time.  I've added early inliner and
> IPA-SRA because I was curious how much of optimization oppurtunities we are
> missing by limiting those to early pass.

At WPA time?  I thought we don't have function bodies around.

Richard.

> With Early inlining it seems to be very little. We inline one extra call when
> building Mozilla in LTO mode.
> 
> IPA SRA is different story.  While we do 579 IPA SRA clones in the early pass,
> the late pass produces 13014 clones (22 times more ;) suggesting that the pass
> might be interesting at IPA level after all.
> 
> There are 78686 functions after inlining in Mozilla, so one out of 7 functions
> is touched.
> 
> Size difference of libxul is not great, about 100Kb reduction. I will try
> benchmarking it eventually, too.
> 
> Honza
> 
> 
> Index: cgraph.c
> ===================================================================
> *** cgraph.c  (revision 175350)
> --- cgraph.c  (working copy)
> *************** cgraph_release_function_body (struct cgr
> *** 1389,1396 ****
>       }
>         if (cfun->cfg)
>       {
> !       gcc_assert (dom_computed[0] == DOM_NONE);
> !       gcc_assert (dom_computed[1] == DOM_NONE);
>         clear_edges ();
>       }
>         if (cfun->value_histograms)
> --- 1393,1403 ----
>       }
>         if (cfun->cfg)
>       {
> !       /*gcc_assert (dom_computed[0] == DOM_NONE);
> !       gcc_assert (dom_computed[1] == DOM_NONE);*/
> ! free_dominance_info (CDI_DOMINATORS);
> ! free_dominance_info (CDI_POST_DOMINATORS);
> ! 
>         clear_edges ();
>       }
>         if (cfun->value_histograms)
> Index: tree-pass.h
> ===================================================================
> *** tree-pass.h       (revision 175350)
> --- tree-pass.h       (working copy)
> *************** extern struct simple_ipa_opt_pass pass_i
> *** 452,458 ****
>   extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
>   extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
>   
> ! extern struct simple_ipa_opt_pass pass_early_local_passes;
>   
>   extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
>   extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> --- 452,458 ----
>   extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
>   extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
>   
> ! extern struct simple_ipa_opt_pass pass_early_local_passes, 
> pass_late_local_passes, pass_late_local_passes2;
>   
>   extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
>   extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> Index: ipa-inline-analysis.c
> ===================================================================
> *** ipa-inline-analysis.c     (revision 175350)
> --- ipa-inline-analysis.c     (working copy)
> *************** estimate_function_body_sizes (struct cgr
> *** 1535,1542 ****
>                 edge->call_stmt_cannot_inline_p = true;
>                 gimple_call_set_cannot_inline (stmt, true);
>               }
> !           else
> !             gcc_assert (!gimple_call_cannot_inline_p (stmt));
>           }
>   
>         /* TODO: When conditional jump or swithc is known to be constant, but
> --- 1535,1542 ----
>                 edge->call_stmt_cannot_inline_p = true;
>                 gimple_call_set_cannot_inline (stmt, true);
>               }
> !           /*else
> !             gcc_assert (!gimple_call_cannot_inline_p (stmt));*/
>           }
>   
>         /* TODO: When conditional jump or swithc is known to be constant, but
> Index: tree-inline.c
> ===================================================================
> *** tree-inline.c     (revision 175350)
> --- tree-inline.c     (working copy)
> *************** expand_call_inline (basic_block bb, gimp
> *** 3891,3897 ****
>     id->src_cfun = DECL_STRUCT_FUNCTION (fn);
>     id->gimple_call = stmt;
>   
> !   gcc_assert (!id->src_cfun->after_inlining);
>   
>     id->entry_bb = bb;
>     if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> --- 3891,3897 ----
>     id->src_cfun = DECL_STRUCT_FUNCTION (fn);
>     id->gimple_call = stmt;
>   
> !   /*gcc_assert (!id->src_cfun->after_inlining);*/
>   
>     id->entry_bb = bb;
>     if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> Index: tree-optimize.c
> ===================================================================
> *** tree-optimize.c   (revision 175350)
> --- tree-optimize.c   (working copy)
> *************** struct simple_ipa_opt_pass pass_early_lo
> *** 123,128 ****
> --- 123,189 ----
>   /* Gate: execute, or not, all of the non-trivial optimizations.  */
>   
>   static bool
> + gate_all_late_local_passes (void)
> + {
> +       /* Don't bother doing anything if the program has errors.  */
> +   return (!seen_error () && optimize);
> + }
> + 
> + static unsigned int
> + execute_all_late_local_passes (void)
> + {
> +   /* Once this pass (and its sub-passes) are complete, all functions
> +      will be in SSA form.  Technically this state change is happening
> +      a tad late, since the sub-passes have not yet run, but since
> +      none of the sub-passes are IPA passes and do not create new
> +      functions, this is ok.  We're setting this value for the benefit
> +      of IPA passes that follow.  */
> +   if (cgraph_state < CGRAPH_STATE_IPA_SSA)
> +     cgraph_state = CGRAPH_STATE_IPA_SSA;
> +   return 0;
> + }
> + 
> + struct simple_ipa_opt_pass pass_late_local_passes =
> + {
> +  {
> +   SIMPLE_IPA_PASS,
> +   "late_local_cleanups",            /* name */
> +   gate_all_late_local_passes,               /* gate */
> +   execute_all_late_local_passes,    /* execute */
> +   NULL,                                     /* sub */
> +   NULL,                                     /* next */
> +   0,                                        /* static_pass_number */
> +   TV_EARLY_LOCAL,                   /* tv_id */
> +   0,                                        /* properties_required */
> +   0,                                        /* properties_provided */
> +   0,                                        /* properties_destroyed */
> +   0,                                        /* todo_flags_start */
> +   TODO_remove_functions                     /* todo_flags_finish */
> +  }
> + };
> + 
> + struct simple_ipa_opt_pass pass_late_local_passes2 =
> + {
> +  {
> +   SIMPLE_IPA_PASS,
> +   "late_local_cleanups2",           /* name */
> +   gate_all_late_local_passes,               /* gate */
> +   execute_all_late_local_passes,    /* execute */
> +   NULL,                                     /* sub */
> +   NULL,                                     /* next */
> +   0,                                        /* static_pass_number */
> +   TV_EARLY_LOCAL,                   /* tv_id */
> +   0,                                        /* properties_required */
> +   0,                                        /* properties_provided */
> +   0,                                        /* properties_destroyed */
> +   0,                                        /* todo_flags_start */
> +   TODO_remove_functions                     /* todo_flags_finish */
> +  }
> + };
> + 
> + /* Gate: execute, or not, all of the non-trivial optimizations.  */
> + 
> + static bool
>   gate_all_early_optimizations (void)
>   {
>     return (optimize >= 1
> Index: passes.c
> ===================================================================
> *** passes.c  (revision 175350)
> --- passes.c  (working copy)
> *************** init_optimization_passes (void)
> *** 1263,1268 ****
> --- 1263,1288 ----
>        passes are executed after partitioning and thus see just parts of the
>        compiled unit.  */
>     p = &all_late_ipa_passes;
> +   NEXT_PASS (pass_late_local_passes);
> +     {
> +       struct opt_pass **p = &pass_late_local_passes.pass.sub;
> +       NEXT_PASS (pass_inline_parameters);
> +       NEXT_PASS (pass_release_ssa_names);
> +     }
> +   NEXT_PASS (pass_late_local_passes2);
> +     {
> +       struct opt_pass **p = &pass_late_local_passes2.pass.sub;
> +       NEXT_PASS (pass_early_inline);
> +       NEXT_PASS (pass_remove_cgraph_callee_edges);
> +       NEXT_PASS (pass_ccp);
> +       NEXT_PASS (pass_forwprop);
> +       NEXT_PASS (pass_fre);
> +       NEXT_PASS (pass_cd_dce);
> +       NEXT_PASS (pass_early_ipa_sra);
> +       NEXT_PASS (pass_release_ssa_names);
> +       NEXT_PASS (pass_rebuild_cgraph_edges);
> +       NEXT_PASS (pass_inline_parameters);
> +     }
>     NEXT_PASS (pass_ipa_pta);
>     *p = NULL;
>     /* These passes are run after IPA passes on every function that is being
> Index: statistics.c
> ===================================================================
> *** statistics.c      (revision 175350)
> --- statistics.c      (working copy)
> *************** statistics_fini_pass_3 (void **slot, voi
> *** 171,176 ****
> --- 171,178 ----
>   void
>   statistics_fini_pass (void)
>   {
> +   if (!current_pass)
> +     return;
>     if (current_pass->static_pass_number == -1)
>       return;
>   
> 
> 

-- 
Richard Guenther <rguent...@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer

Reply via email to