Hi, this is WIP patch to deal with the unreachable clones problem. It basically renders the clones as unanalyzed cgraph nodes (but with still body in) so IPA passes don't see them.
Honza Index: cgraph.c =================================================================== --- cgraph.c (revision 154127) +++ cgraph.c (working copy) @@ -1132,7 +1132,7 @@ cgraph_release_function_body (struct cgr pop_cfun(); gimple_set_body (node->decl, NULL); VEC_free (ipa_opt_pass, heap, - DECL_STRUCT_FUNCTION (node->decl)->ipa_transforms_to_apply); + node->ipa_transforms_to_apply); /* Struct function hangs a lot of data that would leak if we didn't removed all pointers to it. */ ggc_free (DECL_STRUCT_FUNCTION (node->decl)); @@ -1159,6 +1159,8 @@ cgraph_remove_node (struct cgraph_node * cgraph_call_node_removal_hooks (node); cgraph_node_remove_callers (node); cgraph_node_remove_callees (node); + VEC_free (ipa_opt_pass, heap, + node->ipa_transforms_to_apply); /* Incremental inlining access removed nodes stored in the postorder list. */ Index: cgraph.h =================================================================== --- cgraph.h (revision 154127) +++ cgraph.h (working copy) @@ -190,6 +190,11 @@ struct GTY((chain_next ("%h.next"), chai PTR GTY ((skip)) aux; + /* Interprocedural passes scheduled to have their transform functions + applied next time we execute local pass on them. We maintain it + per-function in order to allow IPA passes to introduce new functions. */ + VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply; + struct cgraph_local_info local; struct cgraph_global_info global; struct cgraph_rtl_info rtl; @@ -206,16 +211,24 @@ struct GTY((chain_next ("%h.next"), chai number of cfg nodes with -fprofile-generate and -fprofile-use */ int pid; - /* Set when function must be output - it is externally visible - or its address is taken. */ + /* Set when function must be output for some reason. The primary + use of this flag is to mark functions needed to be output for + non-standard reason. Functions that are externally visible + or reachable from functions needed to be output are marked + by specialized flags. */ unsigned needed : 1; - /* Set when function has address taken. */ + /* Set when function has address taken. + In current implementation it imply needed flag. */ unsigned address_taken : 1; /* Set when decl is an abstract function pointed to by the ABSTRACT_DECL_ORIGIN of a reachable function. */ unsigned abstract_and_needed : 1; /* Set when function is reachable by call from other function - that is either reachable or needed. */ + that is either reachable or needed. + This flag is computed at original cgraph construction and then + updated in cgraph_remove_unreachable_nodes. Note that after + cgraph_remove_unreachable_nodes cgraph still can contain unreachable + nodes when they are needed for virtual clone instantiation. */ unsigned reachable : 1; /* Set once the function is lowered (i.e. its CFG is built). */ unsigned lowered : 1; Index: cgraphunit.c =================================================================== --- cgraphunit.c (revision 154127) +++ cgraphunit.c (working copy) @@ -699,7 +699,7 @@ verify_cgraph_node (struct cgraph_node * error_found = true; } - if (node->analyzed && gimple_has_body_p (node->decl) + if (node->analyzed && node->reachable && gimple_has_body_p (node->decl) && !TREE_ASM_WRITTEN (node->decl) && (!DECL_EXTERNAL (node->decl) || node->global.inlined_to) && !flag_wpa) @@ -1777,8 +1777,8 @@ save_inline_function_body (struct cgraph TREE_PUBLIC (first_clone->decl) = 0; DECL_COMDAT (first_clone->decl) = 0; VEC_free (ipa_opt_pass, heap, - DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply); - DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply = NULL; + first_clone->ipa_transforms_to_apply); + first_clone->ipa_transforms_to_apply = NULL; #ifdef ENABLE_CHECKING verify_cgraph_node (first_clone); @@ -1810,6 +1810,8 @@ cgraph_materialize_clone (struct cgraph_ node->clone_of->clones = node->next_sibling_clone; node->next_sibling_clone = NULL; node->prev_sibling_clone = NULL; + if (!node->clone_of->analyzed && !node->clone_of->clones) + cgraph_remove_node (node->clone_of); node->clone_of = NULL; bitmap_obstack_release (NULL); } Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 154127) +++ ipa-inline.c (working copy) @@ -1120,7 +1120,7 @@ cgraph_decide_inlining (void) max_count = 0; max_benefit = 0; for (node = cgraph_nodes; node; node = node->next) - if (node->analyzed) + if (node->reachable) { struct cgraph_edge *e; Index: lto-streamer-in.c =================================================================== --- lto-streamer-in.c (revision 154127) +++ lto-streamer-in.c (working copy) @@ -1476,6 +1476,7 @@ lto_read_body (struct lto_file_decl_data /* Restore decl state */ file_data->current_decl_state = file_data->global_decl_state; +#if 0 /* FIXME: ipa_transforms_to_apply holds list of passes that have optimization summaries computed and needs to apply changes. At the moment WHOPR only supports inlining, so we can push it here by hand. In future we need to stream @@ -1485,6 +1486,7 @@ lto_read_body (struct lto_file_decl_data VEC_safe_push (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply, (ipa_opt_pass)&pass_ipa_inline); +#endif pop_cfun (); } else Index: c-decl.c =================================================================== --- c-decl.c (revision 154127) +++ c-decl.c (working copy) @@ -4497,6 +4497,7 @@ build_compound_literal (location_t loc, set_compound_literal_name (decl); DECL_DEFER_OUTPUT (decl) = 1; DECL_COMDAT (decl) = 1; + TREE_PUBLIC (decl) = 1; DECL_ARTIFICIAL (decl) = 1; DECL_IGNORED_P (decl) = 1; pushdecl (decl); Index: function.h =================================================================== --- function.h (revision 154127) +++ function.h (working copy) @@ -522,11 +522,6 @@ struct GTY(()) function { unsigned int curr_properties; unsigned int last_verified; - /* Interprocedural passes scheduled to have their transform functions - applied next time we execute local pass on them. We maintain it - per-function in order to allow IPA passes to introduce new functions. */ - VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply; - /* Non-null if the function does something that would prevent it from being copied; this applies to both versioning and inlining. Set to a string describing the reason for failure. */ Index: ipa.c =================================================================== --- ipa.c (revision 154128) +++ ipa.c (working copy) @@ -121,6 +121,7 @@ bool cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file) { struct cgraph_node *first = (struct cgraph_node *) (void *) 1; + struct cgraph_node *processed = (struct cgraph_node *) (void *) 2; struct cgraph_node *node, *next; bool changed = false; @@ -142,9 +143,13 @@ cgraph_remove_unreachable_nodes (bool be gcc_assert (!node->global.inlined_to); node->aux = first; first = node; + node->reachable = true; } else - gcc_assert (!node->aux); + { + gcc_assert (!node->aux); + node->reachable = false; + } /* Perform reachability analysis. As a special case do not consider extern inline functions not inlined as live because we won't output @@ -154,17 +159,26 @@ cgraph_remove_unreachable_nodes (bool be struct cgraph_edge *e; node = first; first = (struct cgraph_node *) first->aux; + node->aux = processed; - for (e = node->callees; e; e = e->next_callee) - if (!e->callee->aux - && node->analyzed - && (!e->inline_failed || !e->callee->analyzed - || (!DECL_EXTERNAL (e->callee->decl)) - || before_inlining_p)) - { - e->callee->aux = first; - first = e->callee; - } + if (node->reachable) + for (e = node->callees; e; e = e->next_callee) + if (!e->callee->reachable + && node->analyzed + && (!e->inline_failed || !e->callee->analyzed + || (!DECL_EXTERNAL (e->callee->decl)) + || before_inlining_p)) + { + bool prev_reachable = e->callee->reachable; + e->callee->reachable |= node->reachable; + if (!e->callee->aux + || (e->callee->aux == processed + && prev_reachable != e->callee->reachable)) + { + e->callee->aux = first; + first = e->callee; + } + } while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl)) { node = node->clone_of; @@ -184,13 +198,18 @@ cgraph_remove_unreachable_nodes (bool be for (node = cgraph_nodes; node; node = next) { next = node->next; + if (node->aux && !node->reachable) + { + cgraph_node_remove_callees (node); + node->analyzed = false; + node->local.inlinable = false; + } if (!node->aux) { node->global.inlined_to = NULL; if (file) fprintf (file, " %s", cgraph_node_name (node)); - if (!node->analyzed || !DECL_EXTERNAL (node->decl) - || before_inlining_p) + if (!node->analyzed || !DECL_EXTERNAL (node->decl) || before_inlining_p) cgraph_remove_node (node); else { @@ -204,21 +223,16 @@ cgraph_remove_unreachable_nodes (bool be /* If so, we need to keep node in the callgraph. */ if (e || node->needed) { - struct cgraph_node *clone; - - /* If there are still clones, we must keep body around. - Otherwise we can just remove the body but keep the clone. */ - for (clone = node->clones; clone; - clone = clone->next_sibling_clone) - if (clone->aux) - break; - if (!clone) - { - cgraph_release_function_body (node); - cgraph_node_remove_callees (node); - node->analyzed = false; - node->local.inlinable = false; - } + cgraph_release_function_body (node); + cgraph_node_remove_callees (node); + node->analyzed = false; + node->local.inlinable = false; + if (node->prev_sibling_clone) + node->prev_sibling_clone->next_sibling_clone = node->next_sibling_clone; + else if (node->clone_of) + node->clone_of->clones = node->next_sibling_clone; + if (node->next_sibling_clone) + node->next_sibling_clone->prev_sibling_clone = node->prev_sibling_clone; } else cgraph_remove_node (node); @@ -318,7 +332,7 @@ function_and_variable_visibility (bool w { if (!vnode->finalized) continue; - gcc_assert ((!DECL_WEAK (vnode->decl) && !DECL_COMMON (vnode->decl)) + gcc_assert ((!DECL_WEAK (vnode->decl) && !DECL_COMMON (vnode->decl) && !DECL_COMDAT (vnode->decl)) || TREE_PUBLIC (vnode->decl) || DECL_EXTERNAL (vnode->decl)); if (vnode->needed && (DECL_COMDAT (vnode->decl) || TREE_PUBLIC (vnode->decl)) Index: tree-inline.c =================================================================== --- tree-inline.c (revision 154127) +++ tree-inline.c (working copy) @@ -1983,9 +1983,6 @@ initialize_cfun (tree new_fndecl, tree c cfun->function_end_locus = src_cfun->function_end_locus; cfun->curr_properties = src_cfun->curr_properties; cfun->last_verified = src_cfun->last_verified; - if (src_cfun->ipa_transforms_to_apply) - cfun->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap, - src_cfun->ipa_transforms_to_apply); cfun->va_list_gpr_size = src_cfun->va_list_gpr_size; cfun->va_list_fpr_size = src_cfun->va_list_fpr_size; cfun->function_frequency = src_cfun->function_frequency; @@ -3822,6 +3819,10 @@ expand_call_inline (basic_block bb, gimp (*debug_hooks->outlining_inline_function) (cg_edge->callee->decl); /* Update callgraph if needed. */ + if (cg_edge->callee->clone_of + && !cg_edge->callee->clone_of->next_sibling_clone + && !cg_edge->callee->analyzed) + cgraph_remove_node (cg_edge->callee); cgraph_remove_node (cg_edge->callee); id->block = NULL_TREE; @@ -4848,6 +4849,19 @@ tree_function_versioning (tree old_decl, id.src_node = old_version_node; id.dst_node = new_version_node; id.src_cfun = DECL_STRUCT_FUNCTION (old_decl); + if (id.src_node->ipa_transforms_to_apply) + { + VEC(ipa_opt_pass,heap) * old_transforms_to_apply = id.dst_node->ipa_transforms_to_apply; + unsigned int i; + + id.dst_node->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap, + id.src_node->ipa_transforms_to_apply); + for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++) + VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply, + VEC_index (ipa_opt_pass, + old_transforms_to_apply, + i)); + } id.copy_decl = copy_decl_no_change; id.transform_call_graph_edges Index: passes.c =================================================================== --- passes.c (revision 154127) +++ passes.c (working copy) @@ -1376,15 +1376,6 @@ update_properties_after_pass (void *data & ~pass->properties_destroyed; } -/* Schedule IPA transform pass DATA for CFUN. */ - -static void -add_ipa_transform_pass (void *data) -{ - struct ipa_opt_pass_d *ipa_pass = (struct ipa_opt_pass_d *) data; - VEC_safe_push (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply, ipa_pass); -} - /* Execute summary generation for all of the passes in IPA_PASS. */ void @@ -1464,19 +1455,22 @@ execute_one_ipa_transform_pass (struct c void execute_all_ipa_transforms (void) { - if (cfun && cfun->ipa_transforms_to_apply) + struct cgraph_node *node; + if (!cfun) + return; + node = cgraph_node (current_function_decl); + if (node->ipa_transforms_to_apply) { unsigned int i; - struct cgraph_node *node = cgraph_node (current_function_decl); - for (i = 0; i < VEC_length (ipa_opt_pass, cfun->ipa_transforms_to_apply); + for (i = 0; i < VEC_length (ipa_opt_pass, node->ipa_transforms_to_apply); i++) execute_one_ipa_transform_pass (node, VEC_index (ipa_opt_pass, - cfun->ipa_transforms_to_apply, + node->ipa_transforms_to_apply, i)); - VEC_free (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply); - cfun->ipa_transforms_to_apply = NULL; + VEC_free (ipa_opt_pass, heap, node->ipa_transforms_to_apply); + node->ipa_transforms_to_apply = NULL; } } @@ -1551,7 +1545,13 @@ execute_one_pass (struct opt_pass *pass) execute_todo (todo_after | pass->todo_flags_finish); verify_interpass_invariants (); if (pass->type == IPA_PASS) - do_per_function (add_ipa_transform_pass, pass); + { + struct cgraph_node *node; + for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed) + VEC_safe_push (ipa_opt_pass, heap, node->ipa_transforms_to_apply, + (struct ipa_opt_pass_d *)pass); + } if (!current_function_decl) cgraph_process_new_functions ();