On November 16, 2014 8:15:37 AM CET, Jan Hubicka <hubi...@ucw.cz> wrote: >Hi, >late in GCC 4.9 development we broke the feature that ltrans stages do >not read all >functions in ahead. This is because of late IPA passes that do not >like to see functions >without IPA transformations applied. I was originally OK with the >solution based >on fact that we have only IPA-PTA as late IPA pass that is disabled by >default and >eventually probably should become part of WPA in some form. >SIMD streaming was however added and this causes us to stream in all >function bodies >and apply all inlining decisions at very beggining of optimization >queue. > >Fixed by this patch. get_body is now responsible for applying >transformations >on demand and late IPA passes needs to call get_body on functions that >they >are interested in + are advised to not be interested in every single >function in >the program. > >The patch also hits a bug in i386's ix86_set_current_function. It is >responsible >for initializing backend and it does so lazily remembering the previous >options >backend was initialized for. Pragma parsing however clears the cache >that leads >to wrong settings being used for subsetquent functions. > >Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit >of more testing.
But for example for IPA pta this means we apply all IPA transforms without any garbage collection run? Richard. >Index: gcc/cgraphclones.c >=================================================================== >--- gcc/cgraphclones.c (revision 217612) >+++ gcc/cgraphclones.c (working copy) >@@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t > node = duplicate_thunk_for_node (thunk_of, node); > > if (!DECL_ARGUMENTS (thunk->decl)) >- thunk->get_body (); >+ thunk->get_untransformed_body (); > > cgraph_edge *cs; > for (cs = node->callers; cs; cs = cs->next_caller) >@@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo > && !gimple_has_body_p (node->decl)) > { > if (!node->clone_of->clone_of) >- node->clone_of->get_body (); >+ node->clone_of->get_untransformed_body (); > if (gimple_has_body_p (node->clone_of->decl)) > { > if (symtab->dump_file) >Index: gcc/ipa-icf.c >=================================================================== >--- gcc/ipa-icf.c (revision 217612) >+++ gcc/ipa-icf.c (working copy) >@@ -706,7 +706,7 @@ void > sem_function::init (void) > { > if (in_lto_p) >- get_node ()->get_body (); >+ get_node ()->get_untransformed_body (); > > tree fndecl = node->decl; > function *func = DECL_STRUCT_FUNCTION (fndecl); >Index: gcc/passes.c >=================================================================== >--- gcc/passes.c (revision 217612) >+++ gcc/passes.c (working copy) >@@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass) > executed. */ > invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass); > >- /* SIPLE IPA passes do not handle callgraphs with IPA transforms in >it. >- Apply all trnasforms first. */ >- if (pass->type == SIMPLE_IPA_PASS) >- { >- struct cgraph_node *node; >- bool applied = false; >- FOR_EACH_DEFINED_FUNCTION (node) >- if (node->analyzed >- && node->has_gimple_body_p () >- && (!node->clone_of || node->decl != node->clone_of->decl)) >- { >- if (!node->global.inlined_to >- && node->ipa_transforms_to_apply.exists ()) >- { >- node->get_body (); >- push_cfun (DECL_STRUCT_FUNCTION (node->decl)); >- execute_all_ipa_transforms (); >- cgraph_edge::rebuild_edges (); >- free_dominance_info (CDI_DOMINATORS); >- free_dominance_info (CDI_POST_DOMINATORS); >- pop_cfun (); >- applied = true; >- } >- } >- if (applied) >- symtab->remove_unreachable_nodes (false, dump_file); >- /* Restore current_pass. */ >- current_pass = pass; >- } >- > if (!quiet_flag && !cfun) > fprintf (stderr, " <%s>", pass->name ? pass->name : ""); > >Index: gcc/cgraphunit.c >=================================================================== >--- gcc/cgraphunit.c (revision 217612) >+++ gcc/cgraphunit.c (working copy) >@@ -197,7 +197,6 @@ along with GCC; see the file COPYING3. > #include "target.h" > #include "diagnostic.h" > #include "params.h" >-#include "fibheap.h" > #include "intl.h" > #include "hash-map.h" > #include "plugin-api.h" >@@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a > } > > if (in_lto_p) >- get_body (); >+ get_untransformed_body (); > a = DECL_ARGUMENTS (thunk_fndecl); > > current_function_decl = thunk_fndecl; >@@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a > gimple ret; > > if (in_lto_p) >- get_body (); >+ get_untransformed_body (); > a = DECL_ARGUMENTS (thunk_fndecl); > > current_function_decl = thunk_fndecl; >@@ -1744,7 +1743,7 @@ cgraph_node::expand (void) > announce_function (decl); > process = 0; > gcc_assert (lowered); >- get_body (); >+ get_untransformed_body (); > > /* Generate RTL for the body of DECL. */ > >Index: gcc/cgraph.c >=================================================================== >--- gcc/cgraph.c (revision 217612) >+++ gcc/cgraph.c (working copy) >@@ -1664,29 +1664,33 @@ release_function_body (tree decl) > { > if (DECL_STRUCT_FUNCTION (decl)) > { >- push_cfun (DECL_STRUCT_FUNCTION (decl)); >- if (cfun->cfg >- && current_loops) >- { >- cfun->curr_properties &= ~PROP_loops; >- loop_optimizer_finalize (); >- } >- if (cfun->gimple_df) >+ if (DECL_STRUCT_FUNCTION (decl)->cfg >+ || DECL_STRUCT_FUNCTION (decl)->gimple_df) > { >- delete_tree_ssa (); >- delete_tree_cfg_annotations (); >- cfun->eh = NULL; >- } >- if (cfun->cfg) >- { >- gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >- gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >- clear_edges (); >- cfun->cfg = NULL; >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >+ if (cfun->cfg >+ && current_loops) >+ { >+ cfun->curr_properties &= ~PROP_loops; >+ loop_optimizer_finalize (); >+ } >+ if (cfun->gimple_df) >+ { >+ delete_tree_ssa (); >+ delete_tree_cfg_annotations (); >+ cfun->eh = NULL; >+ } >+ if (cfun->cfg) >+ { >+ gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >+ gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >+ clear_edges (); >+ cfun->cfg = NULL; >+ } >+ if (cfun->value_histograms) >+ free_histograms (); >+ pop_cfun (); > } >- if (cfun->value_histograms) >- free_histograms (); >- pop_cfun (); > gimple_set_body (decl, NULL); > /* Struct function hangs a lot of data that would leak if we didn't > removed all pointers to it. */ >@@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail > present. */ > > bool >-cgraph_node::get_body (void) >+cgraph_node::get_untransformed_body (void) > { > lto_file_decl_data *file_data; > const char *data, *name; >@@ -3178,6 +3182,44 @@ cgraph_node::get_body (void) > return true; > } > >+/* Prepare function body. When doing LTO, read cgraph_node's body >from disk >+ if it is not already present. When some IPA transformations are >scheduled, >+ apply them. */ >+ >+bool >+cgraph_node::get_body (void) >+{ >+ bool updated; >+ >+ updated = get_untransformed_body (); >+ >+ /* Getting transformed body makes no sense for inline clones; >+ we should never use this on real clones becuase they are >materialized >+ early. >+ TODO: Materializing clones here will likely lead to smaller >LTRANS >+ footprint. */ >+ gcc_assert (!global.inlined_to && !clone_of); >+ if (ipa_transforms_to_apply.exists ()) >+ { >+ opt_pass *saved_current_pass = current_pass; >+ FILE *saved_dump_file = dump_file; >+ int saved_dump_flags = dump_flags; >+ >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >+ execute_all_ipa_transforms (); >+ cgraph_edge::rebuild_edges (); >+ free_dominance_info (CDI_DOMINATORS); >+ free_dominance_info (CDI_POST_DOMINATORS); >+ pop_cfun (); >+ updated = true; >+ >+ current_pass = saved_current_pass; >+ dump_file = saved_dump_file; >+ dump_flags = saved_dump_flags; >+ } >+ return updated; >+} >+ > /* Return the DECL_STRUCT_FUNCTION of the function. */ > > struct function * >Index: gcc/cgraph.h >=================================================================== >--- gcc/cgraph.h (revision 217612) >+++ gcc/cgraph.h (working copy) >@@ -933,6 +933,11 @@ public: > >/* When doing LTO, read cgraph_node's body from disk if it is not >already > present. */ >+ bool get_untransformed_body (void); >+ >+ /* Prepare function body. When doing LTO, read cgraph_node's body >from disk >+ if it is not already present. When some IPA transformations are >scheduled, >+ apply them. */ > bool get_body (void); > > /* Release memory used to represent body of function. >Index: gcc/config/i386/i386.c >=================================================================== >--- gcc/config/i386/i386.c (revision 217612) >+++ gcc/config/i386/i386.c (working copy) >@@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal > /* Remember the last target of ix86_set_current_function. */ > static GTY(()) tree ix86_previous_fndecl; > >+/* Set target globals to default. */ >+ >+static void >+ix86_reset_to_default_globals (void) >+{ >+ tree old_tree = (ix86_previous_fndecl >+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) >+ : NULL_TREE); >+ >+ if (old_tree) >+ { >+ tree new_tree = target_option_current_node; >+ cl_target_option_restore (&global_options, >+ TREE_TARGET_OPTION (new_tree)); >+ if (TREE_TARGET_GLOBALS (new_tree)) >+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >+ else if (new_tree == target_option_default_node) >+ restore_target_globals (&default_target_globals); >+ else >+ TREE_TARGET_GLOBALS (new_tree) >+ = save_target_globals_default_opts (); >+ } >+} >+ > /* Invalidate ix86_previous_fndecl cache. */ > void > ix86_reset_previous_fndecl (void) > { >+ ix86_reset_to_default_globals (); > ix86_previous_fndecl = NULL_TREE; > } > >@@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl) > } > > else if (old_tree) >- { >- new_tree = target_option_current_node; >- cl_target_option_restore (&global_options, >- TREE_TARGET_OPTION (new_tree)); >- if (TREE_TARGET_GLOBALS (new_tree)) >- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >- else if (new_tree == target_option_default_node) >- restore_target_globals (&default_target_globals); >- else >- TREE_TARGET_GLOBALS (new_tree) >- = save_target_globals_default_opts (); >- } >+ ix86_reset_to_default_globals (); > } > } > >@@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no > bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); > gcc_assert (ok); > pop_cfun (); >- ix86_previous_fndecl = NULL_TREE; >+ ix86_reset_previous_fndecl (); > ix86_set_current_function (node->decl); > } > >Index: gcc/tree-inline.c >=================================================================== >--- gcc/tree-inline.c (revision 217612) >+++ gcc/tree-inline.c (working copy) >@@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp > goto egress; > } > fn = cg_edge->callee->decl; >- cg_edge->callee->get_body (); >+ cg_edge->callee->get_untransformed_body (); > > #ifdef ENABLE_CHECKING > if (cg_edge->callee->decl != id->dst_node->decl) >Index: gcc/tree-ssa-structalias.c >=================================================================== >--- gcc/tree-ssa-structalias.c (revision 217612) >+++ gcc/tree-ssa-structalias.c (working copy) >@@ -7086,7 +7086,7 @@ ipa_pta_execute (void) > /* Nodes without a body are not interesting. Especially do not > visit clones at this point for now - we get duplicate decls > there for inline clones at least. */ >- if (!node->has_gimple_body_p () || node->clone_of) >+ if (!node->has_gimple_body_p () || node->global.inlined_to) > continue; > node->get_body (); >