> On Sat, 25 Jun 2011, Jan Hubicka wrote:
> 
> > Hi,
> > just for those who are interested, this is quick&dirty patch adding another
> > pass of local optimization passes at WPA time.  I've added early inliner and
> > IPA-SRA because I was curious how much of optimization oppurtunities we are
> > missing by limiting those to early pass.
> 
> At WPA time?  I thought we don't have function bodies around.

I meant LTRANS time, indeed.
Anyway the tests was made with -flto-partition=none.

Honza
> 
> Richard.
> 
> > With Early inlining it seems to be very little. We inline one extra call 
> > when
> > building Mozilla in LTO mode.
> > 
> > IPA SRA is different story.  While we do 579 IPA SRA clones in the early 
> > pass,
> > the late pass produces 13014 clones (22 times more ;) suggesting that the 
> > pass
> > might be interesting at IPA level after all.
> > 
> > There are 78686 functions after inlining in Mozilla, so one out of 7 
> > functions
> > is touched.
> > 
> > Size difference of libxul is not great, about 100Kb reduction. I will try
> > benchmarking it eventually, too.
> > 
> > Honza
> > 
> > 
> > Index: cgraph.c
> > ===================================================================
> > *** cgraph.c        (revision 175350)
> > --- cgraph.c        (working copy)
> > *************** cgraph_release_function_body (struct cgr
> > *** 1389,1396 ****
> >     }
> >         if (cfun->cfg)
> >     {
> > !     gcc_assert (dom_computed[0] == DOM_NONE);
> > !     gcc_assert (dom_computed[1] == DOM_NONE);
> >       clear_edges ();
> >     }
> >         if (cfun->value_histograms)
> > --- 1393,1403 ----
> >     }
> >         if (cfun->cfg)
> >     {
> > !     /*gcc_assert (dom_computed[0] == DOM_NONE);
> > !     gcc_assert (dom_computed[1] == DOM_NONE);*/
> > ! free_dominance_info (CDI_DOMINATORS);
> > ! free_dominance_info (CDI_POST_DOMINATORS);
> > ! 
> >       clear_edges ();
> >     }
> >         if (cfun->value_histograms)
> > Index: tree-pass.h
> > ===================================================================
> > *** tree-pass.h     (revision 175350)
> > --- tree-pass.h     (working copy)
> > *************** extern struct simple_ipa_opt_pass pass_i
> > *** 452,458 ****
> >   extern struct simple_ipa_opt_pass 
> > pass_ipa_function_and_variable_visibility;
> >   extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
> >   
> > ! extern struct simple_ipa_opt_pass pass_early_local_passes;
> >   
> >   extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
> >   extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> > --- 452,458 ----
> >   extern struct simple_ipa_opt_pass 
> > pass_ipa_function_and_variable_visibility;
> >   extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
> >   
> > ! extern struct simple_ipa_opt_pass pass_early_local_passes, 
> > pass_late_local_passes, pass_late_local_passes2;
> >   
> >   extern struct ipa_opt_pass_d pass_ipa_whole_program_visibility;
> >   extern struct ipa_opt_pass_d pass_ipa_lto_gimple_out;
> > Index: ipa-inline-analysis.c
> > ===================================================================
> > *** ipa-inline-analysis.c   (revision 175350)
> > --- ipa-inline-analysis.c   (working copy)
> > *************** estimate_function_body_sizes (struct cgr
> > *** 1535,1542 ****
> >               edge->call_stmt_cannot_inline_p = true;
> >               gimple_call_set_cannot_inline (stmt, true);
> >             }
> > !         else
> > !           gcc_assert (!gimple_call_cannot_inline_p (stmt));
> >         }
> >   
> >       /* TODO: When conditional jump or swithc is known to be constant, but
> > --- 1535,1542 ----
> >               edge->call_stmt_cannot_inline_p = true;
> >               gimple_call_set_cannot_inline (stmt, true);
> >             }
> > !         /*else
> > !           gcc_assert (!gimple_call_cannot_inline_p (stmt));*/
> >         }
> >   
> >       /* TODO: When conditional jump or swithc is known to be constant, but
> > Index: tree-inline.c
> > ===================================================================
> > *** tree-inline.c   (revision 175350)
> > --- tree-inline.c   (working copy)
> > *************** expand_call_inline (basic_block bb, gimp
> > *** 3891,3897 ****
> >     id->src_cfun = DECL_STRUCT_FUNCTION (fn);
> >     id->gimple_call = stmt;
> >   
> > !   gcc_assert (!id->src_cfun->after_inlining);
> >   
> >     id->entry_bb = bb;
> >     if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> > --- 3891,3897 ----
> >     id->src_cfun = DECL_STRUCT_FUNCTION (fn);
> >     id->gimple_call = stmt;
> >   
> > !   /*gcc_assert (!id->src_cfun->after_inlining);*/
> >   
> >     id->entry_bb = bb;
> >     if (lookup_attribute ("cold", DECL_ATTRIBUTES (fn)))
> > Index: tree-optimize.c
> > ===================================================================
> > *** tree-optimize.c (revision 175350)
> > --- tree-optimize.c (working copy)
> > *************** struct simple_ipa_opt_pass pass_early_lo
> > *** 123,128 ****
> > --- 123,189 ----
> >   /* Gate: execute, or not, all of the non-trivial optimizations.  */
> >   
> >   static bool
> > + gate_all_late_local_passes (void)
> > + {
> > +     /* Don't bother doing anything if the program has errors.  */
> > +   return (!seen_error () && optimize);
> > + }
> > + 
> > + static unsigned int
> > + execute_all_late_local_passes (void)
> > + {
> > +   /* Once this pass (and its sub-passes) are complete, all functions
> > +      will be in SSA form.  Technically this state change is happening
> > +      a tad late, since the sub-passes have not yet run, but since
> > +      none of the sub-passes are IPA passes and do not create new
> > +      functions, this is ok.  We're setting this value for the benefit
> > +      of IPA passes that follow.  */
> > +   if (cgraph_state < CGRAPH_STATE_IPA_SSA)
> > +     cgraph_state = CGRAPH_STATE_IPA_SSA;
> > +   return 0;
> > + }
> > + 
> > + struct simple_ipa_opt_pass pass_late_local_passes =
> > + {
> > +  {
> > +   SIMPLE_IPA_PASS,
> > +   "late_local_cleanups",          /* name */
> > +   gate_all_late_local_passes,             /* gate */
> > +   execute_all_late_local_passes,  /* execute */
> > +   NULL,                                   /* sub */
> > +   NULL,                                   /* next */
> > +   0,                                      /* static_pass_number */
> > +   TV_EARLY_LOCAL,                 /* tv_id */
> > +   0,                                      /* properties_required */
> > +   0,                                      /* properties_provided */
> > +   0,                                      /* properties_destroyed */
> > +   0,                                      /* todo_flags_start */
> > +   TODO_remove_functions                   /* todo_flags_finish */
> > +  }
> > + };
> > + 
> > + struct simple_ipa_opt_pass pass_late_local_passes2 =
> > + {
> > +  {
> > +   SIMPLE_IPA_PASS,
> > +   "late_local_cleanups2",         /* name */
> > +   gate_all_late_local_passes,             /* gate */
> > +   execute_all_late_local_passes,  /* execute */
> > +   NULL,                                   /* sub */
> > +   NULL,                                   /* next */
> > +   0,                                      /* static_pass_number */
> > +   TV_EARLY_LOCAL,                 /* tv_id */
> > +   0,                                      /* properties_required */
> > +   0,                                      /* properties_provided */
> > +   0,                                      /* properties_destroyed */
> > +   0,                                      /* todo_flags_start */
> > +   TODO_remove_functions                   /* todo_flags_finish */
> > +  }
> > + };
> > + 
> > + /* Gate: execute, or not, all of the non-trivial optimizations.  */
> > + 
> > + static bool
> >   gate_all_early_optimizations (void)
> >   {
> >     return (optimize >= 1
> > Index: passes.c
> > ===================================================================
> > *** passes.c        (revision 175350)
> > --- passes.c        (working copy)
> > *************** init_optimization_passes (void)
> > *** 1263,1268 ****
> > --- 1263,1288 ----
> >        passes are executed after partitioning and thus see just parts of the
> >        compiled unit.  */
> >     p = &all_late_ipa_passes;
> > +   NEXT_PASS (pass_late_local_passes);
> > +     {
> > +       struct opt_pass **p = &pass_late_local_passes.pass.sub;
> > +       NEXT_PASS (pass_inline_parameters);
> > +       NEXT_PASS (pass_release_ssa_names);
> > +     }
> > +   NEXT_PASS (pass_late_local_passes2);
> > +     {
> > +       struct opt_pass **p = &pass_late_local_passes2.pass.sub;
> > +       NEXT_PASS (pass_early_inline);
> > +       NEXT_PASS (pass_remove_cgraph_callee_edges);
> > +       NEXT_PASS (pass_ccp);
> > +       NEXT_PASS (pass_forwprop);
> > +       NEXT_PASS (pass_fre);
> > +       NEXT_PASS (pass_cd_dce);
> > +       NEXT_PASS (pass_early_ipa_sra);
> > +       NEXT_PASS (pass_release_ssa_names);
> > +       NEXT_PASS (pass_rebuild_cgraph_edges);
> > +       NEXT_PASS (pass_inline_parameters);
> > +     }
> >     NEXT_PASS (pass_ipa_pta);
> >     *p = NULL;
> >     /* These passes are run after IPA passes on every function that is being
> > Index: statistics.c
> > ===================================================================
> > *** statistics.c    (revision 175350)
> > --- statistics.c    (working copy)
> > *************** statistics_fini_pass_3 (void **slot, voi
> > *** 171,176 ****
> > --- 171,178 ----
> >   void
> >   statistics_fini_pass (void)
> >   {
> > +   if (!current_pass)
> > +     return;
> >     if (current_pass->static_pass_number == -1)
> >       return;
> >   
> > 
> > 
> 
> -- 
> Richard Guenther <rguent...@suse.de>
> Novell / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer

Reply via email to