Hi. I'm sending v3 of the patch where I changed: - function.cold sections are properly put into .text.unlikely and not into a .text.sorted.XYZ section
I've just finished measurements and I still have the original speed up for tramp3d: Total runs: 10, before: 13.92, after: 13.82, cmp: 99.219% Thoughts? Martin
>From f3fd85c29a4a2746555294bd30e3c31129030074 Mon Sep 17 00:00:00 2001 From: Martin Liska <mli...@suse.cz> Date: Thu, 5 Sep 2019 13:32:41 +0200 Subject: [PATCH] Add new ipa-reorder pass. gcc/ChangeLog: 2019-11-25 Martin Liska <mli...@suse.cz> * Makefile.in: Add ipa-reorder.o. * cgraph.c (cgraph_node::dump): Dump text_sorted_order. (cgraph_node_cmp_by_text_sorted): New function that sorts functions based on text_sorted_order. * cgraph.h (cgraph_node): Add text_sorted_order. (cgraph_node_cmp_by_text_sorted): New. * cgraphclones.c (cgraph_node::create_clone): Clone also text_sorted_order. * cgraphunit.c (node_cmp): Remove. (expand_all_functions): Use new function cgraph_node_cmp_by_text_sorted. * common.opt: Add new option reorder_functions_algorithm. * flag-types.h (enum reorder_functions_algorithm): New enum. * ipa-reorder.c: New file. * lto-cgraph.c (lto_output_node): Stream in and out text_sorted_order. (input_node): Likewise. * passes.def: Add pass_ipa_reorder. * timevar.def (TV_IPA_REORDER): New. * tree-pass.h (make_pass_ipa_reorder): New. * varasm.c (default_function_section): Assign text.sorted.X section. gcc/lto/ChangeLog: 2019-11-25 Martin Liska <mli...@suse.cz> * lto-partition.c (node_cmp): Remove. (lto_balanced_map): Use new cgraph_node_cmp_by_text_sorted. --- gcc/Makefile.in | 1 + gcc/cgraph.c | 20 +++ gcc/cgraph.h | 3 + gcc/cgraphclones.c | 1 + gcc/cgraphunit.c | 21 +-- gcc/common.opt | 14 ++ gcc/flag-types.h | 8 + gcc/ipa-reorder.c | 383 ++++++++++++++++++++++++++++++++++++++++ gcc/lto-cgraph.c | 2 + gcc/lto/lto-partition.c | 33 +--- gcc/passes.def | 1 + gcc/timevar.def | 1 + gcc/tree-pass.h | 1 + gcc/varasm.c | 9 + 14 files changed, 448 insertions(+), 50 deletions(-) create mode 100644 gcc/ipa-reorder.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 7d3c13230e4..163d47c47f1 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1369,6 +1369,7 @@ OBJS = \ init-regs.o \ internal-fn.o \ ipa-cp.o \ + ipa-reorder.o \ ipa-sra.o \ ipa-devirt.o \ ipa-fnsummary.o \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 180d21e4796..9c09d63e4ee 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1951,6 +1951,8 @@ cgraph_node::dump (FILE *f) } if (tp_first_run > 0) fprintf (f, " first_run:%i", tp_first_run); + if (text_sorted_order > 0) + fprintf (f, " text_sorted_order:%i", text_sorted_order); if (origin) fprintf (f, " nested in:%s", origin->asm_name ()); if (gimple_has_body_p (decl)) @@ -3718,6 +3720,24 @@ cgraph_edge::possibly_call_in_translation_unit_p (void) return node->get_availability () >= AVAIL_INTERPOSABLE; } +/* Sort cgraph_nodes by text_sorted_order if available, or by order. */ + +int +cgraph_node_cmp_by_text_sorted (const void *pa, const void *pb) +{ + const cgraph_node *a = *(const cgraph_node * const *) pa; + const cgraph_node *b = *(const cgraph_node * const *) pb; + + /* Functions with text_sorted_order should be before these + without profile. */ + if (a->text_sorted_order == 0 || b->text_sorted_order == 0) + return a->text_sorted_order - b->text_sorted_order; + + return a->text_sorted_order != b->text_sorted_order + ? b->text_sorted_order - a->text_sorted_order + : b->order - a->order; +} + /* A stashed copy of "symtab" for use by selftest::symbol_table_test. This needs to be a global so that it can be a GC root, and thus prevent the stashed copy from being garbage-collected if the GC runs diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 0d2442c997c..039819db358 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1435,6 +1435,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node unsigned int profile_id; /* Time profiler: first run of function. */ int tp_first_run; + /* Order in .text.sorted.* section. */ + int text_sorted_order; /* Set when decl is an abstract function pointed to by the ABSTRACT_DECL_ORIGIN of a reachable function. */ @@ -2451,6 +2453,7 @@ bool cgraph_function_possibly_inlined_p (tree); const char* cgraph_inline_failed_string (cgraph_inline_failed_t); cgraph_inline_failed_type_t cgraph_inline_failed_type (cgraph_inline_failed_t); +int cgraph_node_cmp_by_text_sorted (const void *pa, const void *pb); /* In cgraphunit.c */ void cgraphunit_c_finalize (void); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index a79491e0b88..f624cbee185 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -372,6 +372,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, new_node->rtl = rtl; new_node->frequency = frequency; new_node->tp_first_run = tp_first_run; + new_node->text_sorted_order = text_sorted_order; new_node->tm_clone = tm_clone; new_node->icf_merged = icf_merged; new_node->merged_comdat = merged_comdat; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index aa26160bf3f..bacf01ccb22 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2267,24 +2267,6 @@ cgraph_node::expand (void) remove_all_references (); } -/* Node comparator that is responsible for the order that corresponds - to time when a function was launched for the first time. */ - -static int -node_cmp (const void *pa, const void *pb) -{ - const cgraph_node *a = *(const cgraph_node * const *) pa; - const cgraph_node *b = *(const cgraph_node * const *) pb; - - /* Functions with time profile must be before these without profile. */ - if (!a->tp_first_run || !b->tp_first_run) - return a->tp_first_run - b->tp_first_run; - - return a->tp_first_run != b->tp_first_run - ? b->tp_first_run - a->tp_first_run - : b->order - a->order; -} - /* Expand all functions that must be output. Attempt to topologically sort the nodes so function is output when @@ -2315,7 +2297,8 @@ expand_all_functions (void) order[new_order_pos++] = order[i]; if (flag_profile_reorder_functions) - qsort (order, new_order_pos, sizeof (cgraph_node *), node_cmp); + qsort (order, new_order_pos, sizeof (cgraph_node *), + cgraph_node_cmp_by_text_sorted); for (i = new_order_pos - 1; i >= 0; i--) { diff --git a/gcc/common.opt b/gcc/common.opt index 404b6aac298..c2ac1f1f29b 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2276,6 +2276,20 @@ freorder-functions Common Report Var(flag_reorder_functions) Optimization Reorder functions to improve code placement. +freorder-functions-algorithm= +Common Joined RejectNegative Enum(reorder_functions_algorithm) Var(flag_reorder_functions_algorithm) Init(REORDER_FUNCTIONS_ALGORITHM_CALL_CHAIN_CLUSTERING) Optimization +-freorder-functions-algorithm=[first-run|call-chain-clustering] Set the used function reordering algorithm. + +Enum +Name(reorder_functions_algorithm) Type(enum reorder_functions_algorithm) UnknownError(unknown function reordering algorithm %qs) + +EnumValue +Enum(reorder_functions_algorithm) String(first-run) Value(REORDER_FUNCTIONS_ALGORITHM_FIRST_RUN) + +EnumValue +Enum(reorder_functions_algorithm) String(call-chain-clustering) Value(REORDER_FUNCTIONS_ALGORITHM_CALL_CHAIN_CLUSTERING) + + frerun-cse-after-loop Common Report Var(flag_rerun_cse_after_loop) Optimization Add a common subexpression elimination pass after loop optimizations. diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 0c23aadefed..15901280383 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -138,6 +138,14 @@ enum reorder_blocks_algorithm REORDER_BLOCKS_ALGORITHM_STC }; +/* The algorithm used for function reordering. */ +enum reorder_functions_algorithm +{ + REORDER_FUNCTIONS_ALGORITHM_FIRST_RUN, + REORDER_FUNCTIONS_ALGORITHM_CALL_CHAIN_CLUSTERING +}; + + /* The algorithm used for the integrated register allocator (IRA). */ enum ira_algorithm { diff --git a/gcc/ipa-reorder.c b/gcc/ipa-reorder.c new file mode 100644 index 00000000000..36eb5d760b3 --- /dev/null +++ b/gcc/ipa-reorder.c @@ -0,0 +1,383 @@ +/* Reorder functions based on profile. + Copyright (C) 2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "alloc-pool.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "alloc-pool.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" +#include "fibonacci_heap.h" +#include <limits> + +using namespace std; + +namespace { + +#define C3_CLUSTER_THRESHOLD 1024 + +struct cluster_edge; + +/* Cluster is set of functions considered in C3 algorithm. */ + +struct cluster +{ + cluster (cgraph_node *node, int size, sreal time): + m_functions (), m_callers (), m_size (size), m_time (time) + { + m_functions.safe_push (node); + } + + vec<cgraph_node *> m_functions; + hash_map <cluster *, cluster_edge *> m_callers; + int m_size; + sreal m_time; +}; + +/* Cluster edge is an oriented edge in between two clusters. */ + +struct cluster_edge +{ + cluster_edge (cluster *caller, cluster *callee, uint64_t count): + m_caller (caller), m_callee (callee), m_count (count), m_heap_node (NULL) + {} + + + uint64_t inverted_count () + { + return numeric_limits<uint64_t>::max () - m_count; + } + + cluster *m_caller; + cluster *m_callee; + uint64_t m_count; + fibonacci_node<uint64_t, cluster_edge> *m_heap_node; +}; + +/* Sort functions based of first execution of the function. */ + +static void +sort_functions_by_first_run (void) +{ + cgraph_node *node; + + FOR_EACH_DEFINED_FUNCTION (node) + if (node->tp_first_run && !node->alias) + node->text_sorted_order = node->tp_first_run; +} + +/* Compare clusters by density after that are established. */ + +static int +cluster_cmp (const void *a_p, const void *b_p) +{ + const cluster *a = *(cluster * const *)a_p; + const cluster *b = *(cluster * const *)b_p; + + unsigned fncounta = a->m_functions.length (); + unsigned fncountb = b->m_functions.length (); + if (fncounta <= 1 || fncountb <= 1) + return fncountb - fncounta; + + sreal r = b->m_time * a->m_size - a->m_time * b->m_size; + return (r < 0) ? -1 : ((r > 0) ? 1 : 0); +} + +/* Visit callgraph edge CS until we reach a real cgraph_node (not a clone). + Record such edge to EDGES or traverse recursively. */ + +static void +visit_all_edges_for_caller (auto_vec<cluster_edge *> *edges, + cgraph_node *node, cgraph_edge *cs) +{ + if (cs->inline_failed) + { + gcov_type count; + profile_count pcount = cs->count.ipa (); + /* A real call edge. */ + if (!cs->callee->alias + && cs->callee->definition + && pcount.initialized_p () + && (count = pcount.to_gcov_type ()) > 0) + { + cluster *caller = (cluster *)node->aux; + cluster *callee = (cluster *)cs->callee->aux; + cluster_edge **cedge = callee->m_callers.get (caller); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Adding edge:%s->%s:%" PRIu64 "\n", + node->dump_name (), cs->callee->dump_name (), count); + + if (cedge != NULL) + (*cedge)->m_count += count; + else + { + cluster_edge *cedge = new cluster_edge (caller, callee, count); + edges->safe_push (cedge); + callee->m_callers.put (caller, cedge); + } + } + } + else + { + cgraph_node *clone = cs->callee; + for (cgraph_edge *cs = clone->callees; cs; cs = cs->next_callee) + visit_all_edges_for_caller (edges, node, cs); + } +} + +/* Sort functions based on call chain clustering, which is an algorithm + mentioned in the following article: + https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf + . */ + +static void +sort_functions_by_c3 (void) +{ + cgraph_node *node; + auto_vec<cluster *> clusters; + + /* Create a cluster for each function. */ + FOR_EACH_DEFINED_FUNCTION (node) + if (!node->alias + && !node->inlined_to) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Adding node:%s\n", node->dump_name ()); + + ipa_size_summary *size_summary = ipa_size_summaries->get (node); + ipa_fn_summary *fn_summary = ipa_fn_summaries->get (node); + cluster *c = new cluster (node, size_summary->size, fn_summary->time); + node->aux = c; + clusters.safe_push (c); + } + + auto_vec<cluster_edge *> edges; + + /* Insert edges between clusters that have a profile. */ + for (unsigned i = 0; i < clusters.length (); i++) + { + cgraph_node *node = clusters[i]->m_functions[0]; + for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) + visit_all_edges_for_caller (&edges, node, cs); + } + + /* Now insert all created edges into a heap. */ + fibonacci_heap <uint64_t, cluster_edge> heap (0); + + for (unsigned i = 0; i < clusters.length (); i++) + { + cluster *c = clusters[i]; + for (hash_map<cluster *, cluster_edge *>::iterator it + = c->m_callers.begin (); it != c->m_callers.end (); ++it) + { + cluster_edge *cedge = (*it).second; + cedge->m_heap_node = heap.insert (cedge->inverted_count (), cedge); + } + } + + while (!heap.empty ()) + { + cluster_edge *cedge = heap.extract_min (); + cluster *caller = cedge->m_caller; + cluster *callee = cedge->m_callee; + cedge->m_heap_node = NULL; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Processing cluster edge: %p->%p, count: %" + PRIu64 "\n", (void *)caller, (void *)callee, cedge->m_count); + fprintf (dump_file, " source functions (%u): ", caller->m_size); + for (unsigned j = 0; j < caller->m_functions.length (); j++) + fprintf (dump_file, "%s ", caller->m_functions[j]->dump_name ()); + fprintf (dump_file, "\n target functions (%u): ", callee->m_size); + for (unsigned j = 0; j < callee->m_functions.length (); j++) + fprintf (dump_file, "%s ", callee->m_functions[j]->dump_name ()); + fprintf (dump_file, "\n"); + } + + if (caller == callee) + continue; + if (caller->m_size + callee->m_size <= C3_CLUSTER_THRESHOLD) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " (clusters merged)\n"); + + caller->m_size += callee->m_size; + caller->m_time += callee->m_time; + + /* Append all cgraph_nodes from callee to caller. */ + for (unsigned i = 0; i < callee->m_functions.length (); i++) + caller->m_functions.safe_push (callee->m_functions[i]); + + callee->m_functions.truncate (0); + + /* Iterate all cluster_edges of callee and add them to the caller. */ + for (hash_map<cluster *, cluster_edge *>::iterator it + = callee->m_callers.begin (); it != callee->m_callers.end (); + ++it) + { + (*it).second->m_callee = caller; + cluster_edge **ce = caller->m_callers.get ((*it).first); + + if (ce != NULL) + { + (*ce)->m_count += (*it).second->m_count; + if ((*ce)->m_heap_node != NULL) + heap.decrease_key ((*ce)->m_heap_node, + (*ce)->inverted_count ()); + } + else + caller->m_callers.put ((*it).first, (*it).second); + } + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " (clusters too big to be merged)\n"); + } + + /* Sort the candidate clusters. */ + clusters.qsort (cluster_cmp); + + /* Dump clusters. */ + if (dump_file) + { + for (unsigned i = 0; i < clusters.length (); i++) + { + cluster *c = clusters[i]; + if (c->m_functions.length () == 0) + continue; + + fprintf (dump_file, "\nCluster %d with functions: %d, size: %d," + " density: %f\n", i, c->m_functions.length (), c->m_size, + (c->m_time / c->m_size).to_double ()); + fprintf (dump_file, " functions: "); + for (unsigned j = 0; j < c->m_functions.length (); j++) + fprintf (dump_file, "%s ", c->m_functions[j]->dump_name ()); + fprintf (dump_file, "\n"); + } + fprintf (dump_file, "\n"); + } + + /* Assign .text.sorted.* section names. */ + int counter = 1; + for (unsigned i = 0; i < clusters.length (); i++) + { + cluster *c = clusters[i]; + if (c->m_functions.length () <= 1) + continue; + + for (unsigned j = 0; j < c->m_functions.length (); j++) + { + cgraph_node *node = c->m_functions[j]; + + if (dump_file) + fprintf (dump_file, "setting: %d for %s with size:%d\n", + counter, node->dump_asm_name (), + ipa_size_summaries->get (node)->size); + node->text_sorted_order = counter++; + } + } + + /* Release memory. */ + FOR_EACH_DEFINED_FUNCTION (node) + if (!node->alias) + node->aux = NULL; + + for (unsigned i = 0; i < clusters.length (); i++) + delete clusters[i]; + + for (unsigned i = 0; i < edges.length (); i++) + delete edges[i]; +} + +/* The main function for function sorting. */ + +static unsigned int +ipa_reorder (void) +{ + switch (flag_reorder_functions_algorithm) + { + case REORDER_FUNCTIONS_ALGORITHM_CALL_CHAIN_CLUSTERING: + sort_functions_by_c3 (); + break; + case REORDER_FUNCTIONS_ALGORITHM_FIRST_RUN: + sort_functions_by_first_run (); + break; + default: + gcc_unreachable (); + } + + return 0; +} + +const pass_data pass_data_ipa_reorder = +{ + IPA_PASS, /* type */ + "reorder", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_REORDER, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_reorder : public ipa_opt_pass_d +{ +public: + pass_ipa_reorder (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_reorder, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *); + virtual unsigned int execute (function *) { return ipa_reorder (); } + +}; // class pass_ipa_reorder + +bool +pass_ipa_reorder::gate (function *) +{ + return flag_profile_reorder_functions && flag_profile_use && flag_wpa; +} + +} // anon namespace + +ipa_opt_pass_d * +make_pass_ipa_reorder (gcc::context *ctxt) +{ + return new pass_ipa_reorder (ctxt); +} diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index a4a70e7848c..6ff1ac87ad9 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -502,6 +502,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, section = ""; streamer_write_hwi_stream (ob->main_stream, node->tp_first_run); + streamer_write_hwi_stream (ob->main_stream, node->text_sorted_order); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, node->local, 1); @@ -1274,6 +1275,7 @@ input_node (struct lto_file_decl_data *file_data, "node with uid %d", node->get_uid ()); node->tp_first_run = streamer_read_uhwi (ib); + node->text_sorted_order = streamer_read_uhwi (ib); bp = streamer_read_bitpack (ib); diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 75cfdaabf42..a798243ea6d 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -372,35 +372,6 @@ lto_max_map (void) new_partition ("empty"); } -/* Helper function for qsort; sort nodes by order. noreorder functions must have - been removed earlier. */ -static int -node_cmp (const void *pa, const void *pb) -{ - const struct cgraph_node *a = *(const struct cgraph_node * const *) pa; - const struct cgraph_node *b = *(const struct cgraph_node * const *) pb; - - /* Profile reorder flag enables function reordering based on first execution - of a function. All functions with profile are placed in ascending - order at the beginning. */ - - if (flag_profile_reorder_functions) - { - /* Functions with time profile are sorted in ascending order. */ - if (a->tp_first_run && b->tp_first_run) - return a->tp_first_run != b->tp_first_run - ? a->tp_first_run - b->tp_first_run - : a->order - b->order; - - /* Functions with time profile are sorted before the functions - that do not have the profile. */ - if (a->tp_first_run || b->tp_first_run) - return b->tp_first_run - a->tp_first_run; - } - - return b->order - a->order; -} - /* Helper function for qsort; sort nodes by order. */ static int varpool_node_cmp (const void *pa, const void *pb) @@ -537,8 +508,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) unit tends to import a lot of global trees defined there. We should get better about minimizing the function bounday, but until that things works smoother if we order in source order. */ - order.qsort (node_cmp); - noreorder.qsort (node_cmp); + order.qsort (cgraph_node_cmp_by_text_sorted); + noreorder.qsort (cgraph_node_cmp_by_text_sorted); if (dump_file) { diff --git a/gcc/passes.def b/gcc/passes.def index 798a391bd35..983be6ebe3c 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -153,6 +153,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_fn_summary); NEXT_PASS (pass_ipa_inline); NEXT_PASS (pass_ipa_pure_const); + NEXT_PASS (pass_ipa_reorder); NEXT_PASS (pass_ipa_free_fn_summary, false /* small_p */); NEXT_PASS (pass_ipa_reference); /* This pass needs to be scheduled after any IP code duplication. */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 357fcfd65c5..4e575ee406c 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -76,6 +76,7 @@ DEFTIMEVAR (TV_IPA_INHERITANCE , "ipa inheritance graph") DEFTIMEVAR (TV_IPA_VIRTUAL_CALL , "ipa virtual call target") DEFTIMEVAR (TV_IPA_DEVIRT , "ipa devirtualization") DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") +DEFTIMEVAR (TV_IPA_REORDER , "ipa reorder") DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index a987661530e..d59ceef0dc2 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -499,6 +499,7 @@ extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_reorder (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); diff --git a/gcc/varasm.c b/gcc/varasm.c index 57365ad2e76..b3dab695f90 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -601,6 +601,15 @@ default_function_section (tree decl, enum node_frequency freq, if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED) return get_named_text_section (decl, ".text.exit", NULL); + cgraph_node *node = cgraph_node::get (decl); + if (freq != NODE_FREQUENCY_UNLIKELY_EXECUTED && node->text_sorted_order > 0) + { + char section_name[64]; + sprintf (section_name, ".text.sorted.%010d", + node->text_sorted_order); + return get_named_text_section (decl, section_name, NULL); + } + /* Group cold functions together, similarly for hot code. */ switch (freq) { -- 2.24.0