From: Andi Kleen <a...@linux.intel.com> Some projects need to prevent reordering of specific top level declarations with LTO, in particular declarations defining init calls.
The only way to do that with LTO was to use -fno-toplevel-reorder, which stops reordering for all declarations and makes LTO partitioning less efficient. This patch adds a new no_reorder attribute that stops reordering only for the marked declaration. The program can then only mark e.g. the initcalls and leave all the other declarations alone. The patch does: - Adds the new no_reorder attribute for the C family. - Initializes a new no_reorder flag in the symtab_nodes in the function visibility flag. - Maintains the no_reorder flag when creating new nodes. - Changes the partition code to always keep a separate sorted queue of ordered nodes and flush them in order with the other nodes. This is used by all nodes with -fno-toplevel-reorder, and only the marked ones without it. Parts of the old -fno-toplevel-reorder code paths are reused. - Adds various checks throughout the tree to make no_reorder marked functions behave the same as with -fno-toplevel-reorder - Changes the LTO streamer to serialize the no_reorder attribute. Bootstrapped and tested with LTO + -fno-toplevel-reorder, plain LTO and bootstrap w/o LTO on x86_64-linux. Also fixes the reordering in the other large project. gcc/c-family/: 2014-09-15 Andi Kleen <a...@linux.intel.com> * c-common.c (handle_no_reorder_attribute): New function. (c_common_attribute_table): Add no_reorder attribute. gcc/: 2014-09-14 Andi Kleen <a...@linux.intel.com> * cgraph.h (symtab_node): Add no_reorder attribute. (symbol_table::output_asm_statements): Remove. * cgraphclones.c (cgraph_node::create_clone): Copy no_reorder. (cgraph_node::create_version_clone): Dito. (symbol_table::output_asm_statements): Remove. * trans-mem.c (ipa_tm_create_version_alias): Dito. * cgraphunit.c (varpool_node::finalize_decl): Check no_reorder. (output_in_order): Add no_reorder flag. Only handle no_reorder nodes when set. (symbol_table::compile): Add separate pass for no_reorder nodes. * doc/extend.texi (no_reorder): Document no_reorder attribute. * ipa-visibility.c (function_and_variable_visibility): Set no_reorder flag in symtab_node from declaration. * lto-cgraph.c (lto_output_node): Serialize no_reorder. (lto_output_varpool_node): Dito. (input_overwrite_node): Dito. (input_varpool_node): Dito. * varpool.c (varpool_node::add): Set no_reorder attribute. (symbol_table::remove_unreferenced_decls): Handle no_reorder. (symbol_table::output_variables): Dito. * symtab.c (symtab_node::dump_base): Print no_reorder. gcc/lto/: 2014-09-13 Andi Kleen <a...@linux.intel.com> * lto-partition.c (node_cmp): Update comment. (varpool_node_cmp): Use symtab_node for comparison. (add_sorted_nodes): New function. (lto_balanced_map): Change to keep ordered queue of ordered node. Handle no_reorder attribute. --- gcc/c-family/c-common.c | 28 ++++++++++++ gcc/cgraph.h | 5 +-- gcc/cgraphclones.c | 2 + gcc/cgraphunit.c | 38 +++++++--------- gcc/doc/extend.texi | 12 +++++- gcc/ipa-visibility.c | 6 +++ gcc/lto-cgraph.c | 4 ++ gcc/lto/lto-partition.c | 112 ++++++++++++++++++++++++++++++------------------ gcc/symtab.c | 2 + gcc/trans-mem.c | 1 + gcc/varpool.c | 21 +++++++-- 11 files changed, 159 insertions(+), 72 deletions(-) diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 39be956..d0845d1 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -328,6 +328,8 @@ static tree handle_used_attribute (tree *, tree, tree, int, bool *); static tree handle_unused_attribute (tree *, tree, tree, int, bool *); static tree handle_externally_visible_attribute (tree *, tree, tree, int, bool *); +static tree handle_no_reorder_attribute (tree *, tree, tree, int, + bool *); static tree handle_const_attribute (tree *, tree, tree, int, bool *); static tree handle_transparent_union_attribute (tree *, tree, tree, int, bool *); @@ -652,6 +654,8 @@ const struct attribute_spec c_common_attribute_table[] = handle_unused_attribute, false }, { "externally_visible", 0, 0, true, false, false, handle_externally_visible_attribute, false }, + { "no_reorder", 0, 0, true, false, false, + handle_no_reorder_attribute, false }, /* The same comments as for noreturn attributes apply to const ones. */ { "const", 0, 0, true, false, false, handle_const_attribute, false }, @@ -6953,6 +6957,30 @@ handle_externally_visible_attribute (tree *pnode, tree name, return NULL_TREE; } +/* Handle the "no_reorder" attribute. Arguments as in + struct attribute_spec.handler. */ + +static tree +handle_no_reorder_attribute (tree *pnode, + tree name, + tree, + int, + bool *no_add_attrs) +{ + tree node = *pnode; + + if ((TREE_CODE (node) != FUNCTION_DECL && TREE_CODE (node) != VAR_DECL) + && !(TREE_STATIC (node) || (DECL_P (node) && DECL_EXTERNAL (node)))) + { + warning (OPT_Wattributes, + "%qE attribute only affects top level objects", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Handle a "const" attribute; arguments as in struct attribute_spec.handler. */ diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 030a1c7..0d6e321 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -400,6 +400,8 @@ public: /* Set when function is visible by other units. */ unsigned externally_visible : 1; + /* Don't reorder to other symbols having this set. */ + unsigned no_reorder : 1; /* The symbol will be assumed to be used in an invisible way (like by an toplevel asm statement). */ unsigned force_output : 1; @@ -1701,9 +1703,6 @@ public: /* Output all variables enqueued to be assembled. */ bool output_variables (void); - /* Output all asm statements we have stored up to be output. */ - void output_asm_statements (void); - /* Weakrefs may be associated to external decls and thus not output at expansion time. Emit all necessary aliases. */ void output_weakrefs (void); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 224bb55..38d92f5 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -437,6 +437,7 @@ cgraph_node::create_clone (tree decl, gcov_type gcov_count, int freq, new_node->definition = definition; new_node->local = local; new_node->externally_visible = false; + new_node->no_reorder = no_reorder; new_node->local.local = true; new_node->global = global; new_node->global.inlined_to = new_inlined_to; @@ -876,6 +877,7 @@ cgraph_node::create_version_clone (tree new_decl, new_version->definition = definition; new_version->local = local; new_version->externally_visible = false; + new_version->no_reorder = no_reorder; new_version->local.local = new_version->definition; new_version->global = global; new_version->rtl = rtl; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 3e3b8d2..d2d3ba8 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -545,22 +545,6 @@ cgraph_node::add_new_function (tree fndecl, bool lowered) DECL_FUNCTION_PERSONALITY (fndecl) = lang_hooks.eh_personality (); } -/* Output all asm statements we have stored up to be output. */ - -void -symbol_table::output_asm_statements (void) -{ - asm_node *can; - - if (seen_error ()) - return; - - for (can = first_asm_symbol (); can; can = can->next) - assemble_asm (can->asm_str); - - clear_asm_symbols (); -} - /* Analyze the function scheduled to be output. */ void cgraph_node::analyze (void) @@ -785,8 +769,10 @@ varpool_node::finalize_decl (tree decl) if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) /* Traditionally we do not eliminate static variables when not optimizing and when not doing toplevel reoder. */ - || (!flag_toplevel_reorder && !DECL_COMDAT (node->decl) - && !DECL_ARTIFICIAL (node->decl))) + || node->no_reorder + || ((!flag_toplevel_reorder + && !DECL_COMDAT (node->decl) + && !DECL_ARTIFICIAL (node->decl)))) node->force_output = true; if (symtab->state == CONSTRUCTION @@ -1922,10 +1908,11 @@ struct cgraph_order_sort according to their order fields, which is the order in which they appeared in the file. This implements -fno-toplevel-reorder. In this mode we may output functions and variables which don't really - need to be output. */ + need to be output. + When NO_REORDER is true only do this for symbols marked no reorder. */ static void -output_in_order (void) +output_in_order (bool no_reorder) { int max; cgraph_order_sort *nodes; @@ -1940,6 +1927,8 @@ output_in_order (void) { if (pf->process && !pf->thunk.thunk_p && !pf->alias) { + if (no_reorder && !pf->no_reorder) + continue; i = pf->order; gcc_assert (nodes[i].kind == ORDER_UNDEFINED); nodes[i].kind = ORDER_FUNCTION; @@ -1950,6 +1939,8 @@ output_in_order (void) FOR_EACH_DEFINED_VARIABLE (pv) if (!DECL_EXTERNAL (pv->decl)) { + if (no_reorder && !pv->no_reorder) + continue; i = pv->order; gcc_assert (nodes[i].kind == ORDER_UNDEFINED); nodes[i].kind = ORDER_VAR; @@ -2203,11 +2194,12 @@ symbol_table::compile (void) state = EXPANSION; if (!flag_toplevel_reorder) - output_in_order (); + output_in_order (false); else { - output_asm_statements (); - + /* Output first asm statements and anything ordered. The process + flag is cleared for these nodes, so we skip them later. */ + output_in_order (true); expand_all_functions (); output_variables (); } diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 4eef900..c78ffb2 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -2180,7 +2180,7 @@ attributes are currently defined for functions on all targets: @code{returns_nonnull}, @code{gnu_inline}, @code{externally_visible}, @code{hot}, @code{cold}, @code{artificial}, @code{no_sanitize_address}, @code{no_address_safety_analysis}, -@code{no_sanitize_undefined}, +@code{no_sanitize_undefined}, @code{no_reorder}, @code{error} and @code{warning}. Several other attributes are defined for functions on particular target systems. Other attributes, including @code{section} are @@ -3472,6 +3472,16 @@ my_memcpy (void *dest, const void *src, size_t len) __attribute__((nonnull)); @end smallexample +@item no_reorder +@cindex @code{no_reorder} function or variable attribute +Do not reorder functions or variables marked @code{no_reorder} +against each other or top level assembler statements the executable. +The actual order in the program will depend on the linker command +line. Static variables marked like this are also not removed. +This has a similar effect +as the @option{-fno-toplevel-reorder} option, but only applies to the +marked symbols. + @item returns_nonnull @cindex @code{returns_nonnull} function attribute The @code{returns_nonnull} attribute specifies that the function diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c index 32199af..541d568 100644 --- a/gcc/ipa-visibility.c +++ b/gcc/ipa-visibility.c @@ -492,6 +492,9 @@ function_and_variable_visibility (bool whole_program) node->externally_visible = false; node->forced_by_abi = false; } + if (lookup_attribute ("no_reorder", + DECL_ATTRIBUTES (node->decl))) + node->no_reorder = 1; if (!node->externally_visible && node->definition && !node->weakref && !DECL_EXTERNAL (node->decl)) @@ -633,6 +636,9 @@ function_and_variable_visibility (bool whole_program) vnode->externally_visible = false; vnode->forced_by_abi = false; } + if (lookup_attribute ("no_reorder", + DECL_ATTRIBUTES (vnode->decl))) + vnode->no_reorder = 1; if (!vnode->externally_visible && !vnode->weakref) { diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 68f6a52..0584946 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -508,6 +508,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, node->local.local, 1); bp_pack_value (&bp, node->externally_visible, 1); + bp_pack_value (&bp, node->no_reorder, 1); bp_pack_value (&bp, node->definition, 1); bp_pack_value (&bp, node->local.versionable, 1); bp_pack_value (&bp, node->local.can_change_signature, 1); @@ -581,6 +582,7 @@ lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node *node, lto_output_var_decl_index (ob->decl_state, ob->main_stream, node->decl); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, node->externally_visible, 1); + bp_pack_value (&bp, node->no_reorder, 1); bp_pack_value (&bp, node->force_output, 1); bp_pack_value (&bp, node->forced_by_abi, 1); bp_pack_value (&bp, node->unique_name, 1); @@ -1041,6 +1043,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data, node->local.local = bp_unpack_value (bp, 1); node->externally_visible = bp_unpack_value (bp, 1); + node->no_reorder = bp_unpack_value (bp, 1); node->definition = bp_unpack_value (bp, 1); node->local.versionable = bp_unpack_value (bp, 1); node->local.can_change_signature = bp_unpack_value (bp, 1); @@ -1246,6 +1249,7 @@ input_varpool_node (struct lto_file_decl_data *file_data, bp = streamer_read_bitpack (ib); node->externally_visible = bp_unpack_value (&bp, 1); + node->no_reorder = bp_unpack_value (&bp, 1); node->force_output = bp_unpack_value (&bp, 1); node->forced_by_abi = bp_unpack_value (&bp, 1); node->unique_name = bp_unpack_value (&bp, 1); diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 5bd089b..0451a66 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -333,7 +333,8 @@ lto_max_map (void) new_partition ("empty"); } -/* Helper function for qsort; sort nodes by order. */ +/* Helper function for qsort; sort nodes by order. noreorder functions must have + been removed earlier. */ static int node_cmp (const void *pa, const void *pb) { @@ -365,11 +366,26 @@ node_cmp (const void *pa, const void *pb) static int varpool_node_cmp (const void *pa, const void *pb) { - const varpool_node *a = *(const varpool_node * const *) pa; - const varpool_node *b = *(const varpool_node * const *) pb; + const symtab_node *a = *static_cast<const symtab_node * const *> (pa); + const symtab_node *b = *static_cast<const symtab_node * const *> (pb); return b->order - a->order; } +/* Add all symtab nodes from NEXT_NODE to PARTITION in order. */ + +static void +add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition) +{ + unsigned i; + symtab_node *node; + + next_nodes.qsort (varpool_node_cmp); + FOR_EACH_VEC_ELT (next_nodes, i, node) + if (!symbol_partitioned_p (node)) + add_symbol_to_partition (partition, node); +} + + /* Group cgraph nodes into equally-sized partitions. The partitioning algorithm is simple: nodes are taken in predefined order. @@ -414,7 +430,8 @@ lto_balanced_map (int n_lto_partitions) int n_nodes = 0; int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0; struct cgraph_node **order = XNEWVEC (cgraph_node *, symtab->cgraph_max_uid); - varpool_node **varpool_order = NULL; + auto_vec<cgraph_node *> noreorder; + auto_vec<varpool_node *> varpool_order; int i; struct cgraph_node *node; int total_size = 0, best_total_size = 0; @@ -427,6 +444,7 @@ lto_balanced_map (int n_lto_partitions) INT_MAX, best_internal = 0; int npartitions; int current_order = -1; + int noreorder_pos = 0; FOR_EACH_VARIABLE (vnode) gcc_assert (!vnode->aux); @@ -434,7 +452,10 @@ lto_balanced_map (int n_lto_partitions) FOR_EACH_DEFINED_FUNCTION (node) if (node->get_partitioning_class () == SYMBOL_PARTITION) { - order[n_nodes++] = node; + if (node->no_reorder) + noreorder.safe_push (node); + else + order[n_nodes++] = node; if (!node->alias) total_size += inline_summary (node)->size; } @@ -445,27 +466,26 @@ lto_balanced_map (int n_lto_partitions) get better about minimizing the function bounday, but until that things works smoother if we order in source order. */ qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp); + noreorder.qsort (node_cmp); if (symtab->dump_file) - for(i = 0; i < n_nodes; i++) - fprintf (symtab->dump_file, "Balanced map symbol order:%s:%u\n", - order[i]->name (), order[i]->tp_first_run); - - if (!flag_toplevel_reorder) { - FOR_EACH_VARIABLE (vnode) - if (vnode->get_partitioning_class () == SYMBOL_PARTITION) - n_varpool_nodes++; - varpool_order = XNEWVEC (varpool_node *, n_varpool_nodes); - - n_varpool_nodes = 0; - FOR_EACH_VARIABLE (vnode) - if (vnode->get_partitioning_class () == SYMBOL_PARTITION) - varpool_order[n_varpool_nodes++] = vnode; - qsort (varpool_order, n_varpool_nodes, sizeof (varpool_node *), - varpool_node_cmp); + for(i = 0; i < n_nodes; i++) + fprintf (symtab->dump_file, "Balanced map symbol order:%s:%u\n", + order[i]->name (), order[i]->tp_first_run); + for(i = 0; i < (int)noreorder.length(); i++) + fprintf (symtab->dump_file, "Balanced map symbol no_reorder:%s:%u\n", + noreorder[i]->name (), noreorder[i]->tp_first_run); } + /* Collect all variables that should not be reordered. */ + FOR_EACH_VARIABLE (vnode) + if (vnode->get_partitioning_class () == SYMBOL_PARTITION + && (!flag_toplevel_reorder || vnode->no_reorder)) + varpool_order.safe_push (vnode); + n_varpool_nodes = varpool_order.length (); + varpool_order.qsort (varpool_node_cmp); + /* Compute partition size and create the first partition. */ partition_size = total_size / n_lto_partitions; if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) @@ -476,6 +496,8 @@ lto_balanced_map (int n_lto_partitions) fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n", total_size, partition_size); + auto_vec<symtab_node *> next_nodes; + for (i = 0; i < n_nodes; i++) { if (symbol_partitioned_p (order[i])) @@ -483,14 +505,19 @@ lto_balanced_map (int n_lto_partitions) current_order = order[i]->order; - if (!flag_toplevel_reorder) - while (varpool_pos < n_varpool_nodes - && varpool_order[varpool_pos]->order < current_order) - { - if (!symbol_partitioned_p (varpool_order[varpool_pos])) - add_symbol_to_partition (partition, varpool_order[varpool_pos]); - varpool_pos++; - } + /* Output noreorder and varpool in program order first. */ + next_nodes.truncate (0); + while (varpool_pos < n_varpool_nodes + && varpool_order[varpool_pos]->order < current_order) + next_nodes.safe_push (varpool_order[varpool_pos++]); + while (noreorder_pos < (int)noreorder.length () + && noreorder[noreorder_pos]->order < current_order) + { + if (!noreorder[noreorder_pos]->alias) + total_size -= inline_summary (noreorder[noreorder_pos])->size; + next_nodes.safe_push (noreorder[noreorder_pos++]); + } + add_sorted_nodes (next_nodes, partition); add_symbol_to_partition (partition, order[i]); if (!order[i]->alias) @@ -580,6 +607,7 @@ lto_balanced_map (int n_lto_partitions) if (!vnode->definition) continue; if (!symbol_partitioned_p (vnode) && flag_toplevel_reorder + && !vnode->no_reorder && vnode->get_partitioning_class () == SYMBOL_PARTITION) add_symbol_to_partition (partition, vnode); index = lto_symtab_encoder_lookup (partition->encoder, @@ -616,6 +644,7 @@ lto_balanced_map (int n_lto_partitions) to be removed. Coupling with objects they refer to only helps to reduce number of symbols promoted to hidden. */ if (!symbol_partitioned_p (vnode) && flag_toplevel_reorder + && !vnode->no_reorder && !vnode->can_remove_if_no_refs_p () && vnode->get_partitioning_class () == SYMBOL_PARTITION) add_symbol_to_partition (partition, vnode); @@ -705,24 +734,25 @@ lto_balanced_map (int n_lto_partitions) } } + next_nodes.truncate (0); + /* Varables that are not reachable from the code go into last partition. */ if (flag_toplevel_reorder) { FOR_EACH_VARIABLE (vnode) if (vnode->get_partitioning_class () == SYMBOL_PARTITION - && !symbol_partitioned_p (vnode)) - add_symbol_to_partition (partition, vnode); - } - else - { - while (varpool_pos < n_varpool_nodes) - { - if (!symbol_partitioned_p (varpool_order[varpool_pos])) - add_symbol_to_partition (partition, varpool_order[varpool_pos]); - varpool_pos++; - } - free (varpool_order); + && !symbol_partitioned_p (vnode) + && !vnode->no_reorder) + next_nodes.safe_push (vnode); } + + /* Output remaining ordered symbols. */ + while (varpool_pos < n_varpool_nodes) + next_nodes.safe_push (varpool_order[varpool_pos++]); + while (noreorder_pos < (int)noreorder.length ()) + next_nodes.safe_push (noreorder[noreorder_pos++]); + add_sorted_nodes (next_nodes, partition); + free (order); } diff --git a/gcc/symtab.c b/gcc/symtab.c index 792b3b5..ff1bdf7 100644 --- a/gcc/symtab.c +++ b/gcc/symtab.c @@ -831,6 +831,8 @@ symtab_node::dump_base (FILE *f) fprintf (f, " forced_by_abi"); if (externally_visible) fprintf (f, " externally_visible"); + if (no_reorder) + fprintf (f, " no_reorder"); if (resolution != LDPR_UNKNOWN) fprintf (f, " %s", ld_plugin_symbol_resolution_names[(int)resolution]); diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c index 8285796..94d896d 100644 --- a/gcc/trans-mem.c +++ b/gcc/trans-mem.c @@ -4849,6 +4849,7 @@ ipa_tm_create_version_alias (struct cgraph_node *node, void *data) new_node = cgraph_node::create_same_body_alias (new_decl, info->new_decl); new_node->tm_clone = true; new_node->externally_visible = info->old_node->externally_visible; + new_node->no_reorder = info->old_node->no_reorder; /* ?? Do not traverse aliases here. */ get_cg_data (&node, false)->clone = new_node; diff --git a/gcc/varpool.c b/gcc/varpool.c index 14ef089..8001c93 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -449,6 +449,8 @@ varpool_node::add (tree decl) symtab->call_varpool_insertion_hooks (node); if (node->externally_visible_p ()) node->externally_visible = true; + if (lookup_attribute ("no_reorder", decl)) + node->no_reorder = 1; } /* Return variable availability. See cgraph.h for description of individual @@ -640,7 +642,7 @@ symbol_table::remove_unreferenced_decls (void) for (node = first_defined_variable (); node; node = next) { next = next_defined_variable (node); - if (!node->aux) + if (!node->aux && !node->no_reorder) { if (dump_file) fprintf (dump_file, " %s", node->asm_name ()); @@ -687,11 +689,22 @@ symbol_table::output_variables (void) timevar_push (TV_VAROUT); FOR_EACH_DEFINED_VARIABLE (node) - node->finalize_named_section_flags (); + { + /* Handled in output_in_order. */ + if (node->no_reorder) + continue; + + node->finalize_named_section_flags (); + } FOR_EACH_DEFINED_VARIABLE (node) - if (node->assemble_decl ()) - changed = true; + { + /* Handled in output_in_order. */ + if (node->no_reorder) + continue; + if (node->assemble_decl ()) + changed = true; + } timevar_pop (TV_VAROUT); return changed; } -- 2.1.0