Comments inline below. > > This patch brings the following to the linker function reordering plugin > present in gcc-4_7 > > * Node profiles: Callgraph node profiles from the compiler are passed to the > linker plugin. The node profiles are passed as bb entry count and max count > of the corresponding function. The entry count of the split cold function is > also passed when present.
It is the max bb count of the split cold function, not the entry count. > > * With this patch, the plugin will sorts all sections that are not grouped by > the plugin's callgraph according to their node weights. > > * New flags to the plugin to control the following: > > a) sort_name_prefix=<yes|no>: This is off by default. When this is on, the > plugin groups sections by their section name prefix. > b) use_maxcount=<yes|no>: This is on by default. This uses the max of > max_count and the node weights as the actual node weight of a function. When > this is off, the entry count is used as the node weight. > c) edge_cutoff=<a|p><value>: This can used to prune away cold callgraph > edges from the linker plugin constructed callgraph. It can be expressed as > a percent of the max edge value, ex: p25 for 25% or an absolute value, > ex: a15000. The default is to consider all edges to be in the callgraph. > d) unlikely_segment_cutoff=<value>: This decides the profile threshold below > which functions should be considered unlikely. The default is zero. This is > useful when splitting unlikely functions into a separate ELF segment using the > gold linker. > > Handling split cold functions in the plugin will be done as a follow-up patch. > > Index: function_reordering_plugin/callgraph.c > =================================================================== > --- function_reordering_plugin/callgraph.c (revision 198081) > +++ function_reordering_plugin/callgraph.c (working copy) > @@ -144,14 +144,18 @@ const int NUM_FUNCTIONS = 100; > > /* Reads off the next string from the char stream CONTENTS and updates > READ_LENGTH to the length of the string read. The value of CONTENTS > - is updated to start at the next string. */ > + is updated to start at the next string. UPDATE_CONTENTS tells if > + CONTENTS must be moved past the read string to the next string. To > + peek at the string, UPDATE_CONTENTS can be set to false. */ > > static char * > -get_next_string (char **contents, unsigned int *read_length) > +get_next_string (char **contents, unsigned int *read_length, > + int update_contents) Does the plugin have access to type bool? > { > char *s = *contents; > *read_length = strlen (*contents) + 1; > - *contents += *read_length; > + if (update_contents) > + *contents += *read_length; > return s; > } > > @@ -192,7 +196,7 @@ remove_edge_from_list (Edge * curr_edge) > /* Adds the WEIGHT value to the edge count of CALLER and CALLEE. */ > > static void > -update_edge (Node *n1, Node *n2, unsigned int weight) > +update_edge (Node *n1, Node *n2, unsigned long long weight) > { > void **slot; > Raw_edge re, *r; > @@ -227,6 +231,9 @@ static void > e = *slot; > e->weight += weight; > } > + /* Update the computed_weight, the computed node weight, of n2 which is the > + sum of weights of all incoming edges to n2. */ Comment would read clearer as something like "Update the computed node weight for n2, which is the sum of its incoming edge weights." > + n2->computed_weight += weight; > } > > /* Create a unique node for a function. */ > @@ -288,10 +295,14 @@ void dump_edges (FILE *fp) > it != NULL; > it = it->next) > { > - fprintf (fp,"# %s ---- (%u)---- %s\n", > + fprintf (fp,"# %s (%llu, %llu) ---- (%llu)---- %s (%llu, %llu)\n", > it->first_function->name, > + it->first_function->weight, > + it->first_function->computed_weight, > it->weight, > - it->second_function->name); > + it->second_function->name, > + it->second_function->weight, > + it->second_function->computed_weight); > } > } > > @@ -320,6 +331,8 @@ canonicalize_function_name (void *file_handle, cha > call graph edges with appropriate weights. The section contents have the > following format : > Function <caller_name> > + Weight <entry_count> <max_count> (optional line) > + ColdWeight <max_count> (optional line) > <callee_1> > <edge count between caller and callee_1> > <callee_2> > @@ -332,30 +345,85 @@ parse_callgraph_section_contents (void *file_handl > { > char *contents; > char *caller; > + char *node_weight_s = NULL; > unsigned int read_length = 0, curr_length = 0; > Node *caller_node; > > /* HEADER_LEN is the length of string 'Function '. */ > const int HEADER_LEN = 9; > > - /* First string in contents is 'Function <function-name>'. */ > + /* Prefix of line containing node weights. */ > + const char *NODE_WEIGHT_PREFIX = "Weight "; > + /* Prefix of line containing max bb count of cold split part. */ > + const char *SPLIT_FUNCTION_PREFIX = "ColdWeight "; > + > + /* First string in contents is 'Function <function-name>'. */ > assert (length > 0); > contents = (char*) (section_contents); > - caller = get_next_string (&contents, &read_length); > + caller = get_next_string (&contents, &read_length, 1); > assert (read_length > HEADER_LEN); > caller = canonicalize_function_name (file_handle, caller + HEADER_LEN); > curr_length = read_length; > caller_node = get_function_node (caller); > > + /* Check if next string is a node weight, it has the format "which has the format" > + "Weight <entr_count> <max_count>". We could have callgraph > + sections with or without node weights. */ > + > + /* Peek at the next string. */ > + if (curr_length < length) > + node_weight_s = get_next_string (&contents, &read_length, 0); > + if (node_weight_s != NULL > + && is_prefix_of (NODE_WEIGHT_PREFIX, node_weight_s)) > + { > + char *max_count_s; > + unsigned long long max_count; > + unsigned long long node_weight > + = atoll (node_weight_s + strlen (NODE_WEIGHT_PREFIX)); > + /* Functions like comdats only have one caller_node and can > + have multiple node weights from multiple modules. */ > + caller_node->weight += node_weight; > + > + /* Find the space and get the max_count. */ > + max_count_s = strstr (node_weight_s + strlen (NODE_WEIGHT_PREFIX), " > "); > + if (max_count_s != NULL) > + { > + max_count = atoll (max_count_s + 1); > + /* Functions like comdats only have one caller_node and can > + have multiple node weights from multiple modules. */ > + caller_node->max_count += max_count; > + } > + /* Actually read the node weight here. */ > + get_next_string (&contents, &read_length, 1); > + curr_length += read_length; > + } > + > + /* If the function is split it could have the weight of the split cold > + section here as "SplitWeight <max_count>". */ > + > + /* Peek at the next string. */ > + if (curr_length < length) > + node_weight_s = get_next_string (&contents, &read_length, 0); > + if (node_weight_s != NULL > + && is_prefix_of (SPLIT_FUNCTION_PREFIX, node_weight_s)) > + { > + unsigned long long split_weight > + = atoll (node_weight_s + strlen (SPLIT_FUNCTION_PREFIX)); > + caller_node->split_weight = split_weight; > + /* Actually read the node weight here. */ > + get_next_string (&contents, &read_length, 1); > + curr_length += read_length; > + } > + > while (curr_length < length) > { > /* Read callee, weight tuples. */ > char *callee; > char *weight_str; > - unsigned int weight; > + unsigned long long weight; > Node *callee_node; > > - callee = get_next_string (&contents, &read_length); > + callee = get_next_string (&contents, &read_length, 1); > curr_length += read_length; > > /* We can have multiple header lines; such a situation arises when > @@ -369,8 +437,8 @@ parse_callgraph_section_contents (void *file_handl > callee_node = get_function_node (callee); > > assert (curr_length < length); > - weight_str = get_next_string (&contents, &read_length); > - weight = atoi (weight_str); > + weight_str = get_next_string (&contents, &read_length, 1); > + weight = atoll (weight_str); > curr_length += read_length; > update_edge (caller_node, callee_node, weight); > } > @@ -473,16 +541,56 @@ static void set_node_type (Node *n) > slot = htab_find_with_hash (section_map, name, htab_hash_string (name)); > if (slot != NULL) > { > + /* Update the section instance corresponding to the node instance. > + Assign the weights from the node instance to the section instance. */ > + Section_id *s = (Section_id *)(slot); > + Section_id *s_comdat; > + assert (s->weight == 0 && s->computed_weight == 0 && s->max_count == > 0); > + s->weight = n->weight; > + s->computed_weight = n->computed_weight; > + s->max_count = n->max_count; I thought we had some handling for split cold sections here? Otherwise there are no uses of split_weight. > + > + /* If s is comdat, update all the comdat candidates for weight. */ > + s_comdat = s->comdat_group; > + while (s_comdat != NULL) > + { > + s_comdat->weight = s->weight; > + s_comdat->computed_weight = s->computed_weight; > + s_comdat->max_count = s->max_count; > + s_comdat = s_comdat->comdat_group; > + } > set_as_real_node (n); > num_real_nodes++; > } > } > > +/* Return true if WEIGHT is more than the cutoff, specified either as > + as percent, CUTOFF_P, of MAX or as an absolute value, CUTOFF_A. */ > +int > +edge_over_cutoff (unsigned long long weight, unsigned long long max, > + unsigned int cutoff_p, unsigned long long cutoff_a) > +{ > + /* First check if weight if more than cutoff_p% of max. */ > + if (((double)(max) * (cutoff_p/100.0)) >= (double) weight) > + return 0; > + if (cutoff_a >= weight) > + return 0; > + return 1; > +} > + > +/* Edge cutoff is used to discard callgraph edges that are not above a > + certain threshold. cutoff_p is to express this as a percent of the > + maximum value and cutoff_a is used to express this as an absolute > + value. */ > +extern unsigned int edge_cutoff_p; > +extern unsigned long long edge_cutoff_a; > + > void > find_pettis_hansen_function_layout (FILE *fp) > { > Node *n_it; > Edge *it; > + unsigned int max_edge_value = 0; > > assert (node_chain != NULL); > assert (active_edges != NULL); > @@ -500,8 +608,18 @@ find_pettis_hansen_function_layout (FILE *fp) > set_edge_type (it); > > it = find_max_edge (); > + if (it != NULL) > + max_edge_value = it->weight; > while (it != NULL) > { > + if (!edge_over_cutoff (it->weight, max_edge_value, edge_cutoff_p, > + edge_cutoff_a)) > + { > + if (fp !=NULL) > + fprintf (fp, "Not considering edge with weight %llu and below\n", > + it->weight); > + break; > + } > collapse_edge (it); > it = find_max_edge (); > } > @@ -520,12 +638,17 @@ const char *section_types[] = {".text.hot.", > ".text." }; > > /* For sections that are not in the callgraph, the priority gives the > - order in which the sections should be laid out. Sections are grouped > - according to priority, higher priority (lower number), and then laid > - out in priority order. */ > -const int section_priority[] = {0, 3, 4, 2, 1}; > -const int UNLIKELY_SECTION_INDEX = 2; > + importance of each section type. Sections are grouped according to > + priority, higher priority (lower number). */ > +const int section_priority[] = {0, 3, 4, 1, 2}; > > +/* Order in which the sections must be laid out is given by > + section_position[section_type]. The order in which the section > + types are laid out from address low to high are: .text.unlikely, > + .text.cold, .text.startup, .text., .text.hot followed by the Right now we just use text.unlikely, not text.cold. Assuming we use both in the future, what was the intended ordering between these - intuitively it seems like unlikely should be "warmer" than cold, in which case unlikely should be after text.cold. What do you think? > + sections grouped by the callgraph. */ > +const int section_position[] = {4, 1, 0, 2, 3}; > + > /* Maps the function name corresponding to section SECTION_NAME to the > object handle and the section index. */ > > @@ -628,9 +751,118 @@ write_out_node (Section_id *s, Section_id **sectio > } > } > > -int unlikely_segment_start = 0; > -int unlikely_segment_end = 0; > +/* Find the max of a, b and c. */ > +static unsigned long long > +get_max (unsigned long long a, unsigned long long b, unsigned long long c) > +{ > + unsigned long long max = a; > + if (b > max) > + max = b; > + if (c > max) > + max = c; > + return max; > +} > > +/* This is true if the max count of any bb in a function should be used as > + the node weight rather than the count of the entry bb. */ > +extern int use_max_count; > + > +/* Comparison function for sorting two sections a and b by their > + weight. */ > +static > +int section_weight_compare (const void *a, const void *b) > +{ > + Section_id *s_a = *(Section_id **)a; > + Section_id *s_b = *(Section_id **)b; > + assert (use_max_count <= 1); > + unsigned long long max_sa_weight = get_max (s_a->weight, > s_a->computed_weight, > + s_a->max_count * use_max_count); > + unsigned long long max_sb_weight = get_max (s_b->weight, > s_b->computed_weight, > + s_b->max_count * use_max_count); > + > + if (max_sa_weight < max_sb_weight) > + return -1; > + else if (max_sa_weight == max_sb_weight) > + return 0; > + > + return 1; > +} > + > +/* s is a pointer to a section and the group of sections is linked > + via s->group. The output is the list of sections sorted by their > + node weights (which is the maximum of their profile count, computed > + weights or the max bb count if use_max_count is true). */ > +static Section_id * > +sort_section_group (Section_id *s) > +{ > + Section_id **sort_array; > + Section_id *s_tmp; > + int num_elements = 0; > + int i; > + > + if (s == NULL) > + return s; > + > + s_tmp = s; > + while (s_tmp != NULL) > + { > + num_elements++; > + s_tmp = s_tmp->group; > + } > + > + if (num_elements == 1) > + return s; > + > + XNEWVEC_ALLOC (sort_array, Section_id *, num_elements); > + s_tmp = s; > + for (i = 0; i < num_elements; ++i) > + { > + sort_array[i] = s_tmp; > + s_tmp = s_tmp->group; > + } > + > + for (i = 0; i < num_elements; ++i) > + { > + sort_array[i]->group = NULL; > + } > + > + qsort (sort_array, num_elements, sizeof (Section_id *), > + section_weight_compare); > + > + s_tmp = sort_array[0]; > + for (i = 1; i < num_elements; ++i) > + { > + s_tmp->group = sort_array[i]; > + s_tmp = s_tmp->group; > + } > + s_tmp->group = NULL; > + return sort_array[0]; > +} > + > +/* If sort_name_prefix is true then the sections not touched by the callgraph > + are grouped according to their name prefix. When sort_name_prefix is > zero, > + all the sections are put together and sorted according to their node > + weights. The default value of sort_name_prefix is 0. Even when sections > + are grouped by their prefix, each group is sorted by the node weights. */ > +extern int sort_name_prefix; > +static int section_position_index (int section_type) > +{ > + assert (section_type >= 0 && section_type < NUM_SECTION_TYPES); > + if (!sort_name_prefix) > + return 0; > + else > + return section_position[section_type]; > +} > + > +/* Track where the unlikely sections start and end. This will be needed if > + the unlikely sections need to be split into a separate segment. */ > +int unlikely_segment_start = -1; > +int unlikely_segment_end = -1; > + > +/* This value is used to determine the profile threshold below which the > + section is considered unlikely. The default is zero. */ > +extern unsigned long long unlikely_segment_profile_cutoff; > + > /* Visit each node and print the chain of merged nodes to the file. Update > HANDLES and SHNDX to contain the ordered list of sections. */ > > @@ -642,12 +874,15 @@ get_layout (FILE *fp, void*** handles, > int i = 0; > int position; > void *slot; > + int unlikely_section_index; > + const int CALLGRAPH_POSITION = 5; > > - /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 0 corresponds > + /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 5 corresponds > to the list of sections that correspond to functions in the callgraph. > For other sections, they are grouped by section_type and stored in > - index: (section_type + 1). SECTION_START points to the first > - section in each section group and SECTION_END points to the last. */ > + index: section_position[section_type]). > + SECTION_START points to the first section in each section group and > + SECTION_END points to the last. */ > Section_id *section_start[NUM_SECTION_TYPES + 1]; > Section_id *section_end[NUM_SECTION_TYPES + 1]; > Section_id *s_it; > @@ -675,7 +910,8 @@ get_layout (FILE *fp, void*** handles, > htab_hash_string (n_it->name)); > assert (slot != NULL); > s = (Section_id *)slot; > - write_out_node (s, §ion_start[0], §ion_end[0]); > + write_out_node (s, §ion_start[CALLGRAPH_POSITION], > + §ion_end[CALLGRAPH_POSITION]); > > if (fp) > fprintf (fp, "# Callgraph group : %s", n_it->name); > @@ -689,8 +925,8 @@ get_layout (FILE *fp, void*** handles, > htab_hash_string (node->name)); > assert (slot != NULL); > s = (Section_id *)slot; > - write_out_node (s, §ion_start[0], > - §ion_end[0]); > + write_out_node (s, §ion_start[CALLGRAPH_POSITION], > + §ion_end[CALLGRAPH_POSITION]); > if (fp) > fprintf (fp, " %s", node->name); > } > @@ -707,30 +943,87 @@ get_layout (FILE *fp, void*** handles, > while (s_it) > { > if (!s_it->processed) > - write_out_node (s_it, §ion_start[s_it->section_type + 1], > - §ion_end[s_it->section_type + 1]); > + { > + int index = section_position_index(s_it->section_type); > + write_out_node (s_it, §ion_start[index], §ion_end[index]); > + } > s_it = s_it->next; > } > > + /* Determine the unlikely section index */ > + unlikely_section_index = -1; > + for (i = 0; i < ARRAY_SIZE (section_types); ++i) > + if (strcmp (".text.unlikely.", section_types[i]) == 0) > + break; > > + assert (i < ARRAY_SIZE (section_types)); > + unlikely_section_index = section_position_index(i); > + > position = 0; > for (i = 0; i < NUM_SECTION_TYPES + 1; ++i) > { > s_it = section_start[i]; > - if (i == UNLIKELY_SECTION_INDEX + 1) > - unlikely_segment_start = position; > - while (s_it) > + > + if (s_it == NULL) > + continue; > + > + /* Sort all section groups by weight except the callgraph group. */ What prevents us from doing the below sorting on the callgraph group? In that case is i==NUM_SECTION_TYPES? Can there be a clearer way of marking that? > + if (i < NUM_SECTION_TYPES) > + s_it = sort_section_group (s_it); > + > + /* Start the unlikely segment if necessary. */ > + assert (use_max_count <= 1); > + if (i == unlikely_section_index > + && (get_max (s_it->weight, s_it->computed_weight, > + s_it->max_count * use_max_count) > + <= unlikely_segment_profile_cutoff)) > + { > + assert (unlikely_segment_start == -1); > + unlikely_segment_start = position; > + if (fp != NULL) > + fprintf (fp, "=== Unlikely sections start ===\n"); > + } > + > + do > { > assert (position < num_sections); > (*handles)[position] = s_it->handle; > (*shndx)[position] = s_it->shndx; > + > + /* Check if this section will end the unlikely segment. */ > + if (i == unlikely_section_index > + && unlikely_segment_start >= 0 > + && unlikely_segment_start != position > + && unlikely_segment_end == -1 > + && (get_max (s_it->weight, s_it->computed_weight, > + s_it->max_count * use_max_count) > + > unlikely_segment_profile_cutoff)) > + { > + unlikely_segment_end = position - 1; > + if (fp != NULL) > + fprintf (fp, "=== Unlikely sections end ===\n"); > + } > + > position++; > if (fp != NULL) > - fprintf (fp, "%s\n", s_it->full_name); > + { > + fprintf (fp, "%s entry count = %llu computed = %llu " > + "max count = %llu\n", s_it->full_name, s_it->weight, > + s_it->computed_weight, s_it->max_count); > + } > s_it = s_it->group; > } > - if (i == UNLIKELY_SECTION_INDEX + 1) > - unlikely_segment_end = position; > + while (s_it); > + > + /* End the unlikely segment if it has not been done already. */ > + if (i == unlikely_section_index > + && unlikely_segment_start != -1 > + && unlikely_segment_end == -1) > + { > + unlikely_segment_end = position - 1; > + if (fp != NULL) > + fprintf (fp, "=== Unlikely sections end ===\n"); > + } > } > return position; > } > Index: function_reordering_plugin/callgraph.h > =================================================================== > --- function_reordering_plugin/callgraph.h (revision 198081) > +++ function_reordering_plugin/callgraph.h (working copy) > @@ -61,6 +61,15 @@ typedef struct node_d > { > unsigned int id; > char *name; > + /* Node weight, execution count of entry bb. */ > + unsigned long long weight; > + /* Weight computed by adding weights of incoming edges to > + this node. */ > + unsigned long long computed_weight; > + /* Max count of any bb executed. */ > + unsigned long long max_count; > + /* Stores the max count of any bb in the split cold section. */ > + unsigned long long split_weight; > /* Chain all the Nodes created. */ > struct node_d *next; > /* Pointer to the next node in the chain of merged nodes. */ > @@ -81,6 +90,10 @@ make_node (unsigned int id, char *name) > XNEW_ALLOC (node, Node); > node->id = id; > node->name = name; > + node->weight = 0; > + node->computed_weight = 0; > + node->max_count = 0; > + node->split_weight = 0; > node->is_real_node = 0; > node->next = NULL; > node->edge_list = NULL; > @@ -133,7 +146,7 @@ struct edge_d > { > Node *first_function; > Node *second_function; > - unsigned int weight; > + unsigned long long weight; > Edge_type type; > /* 1 if the nodes corresponding to this edge have been merged. */ > unsigned int is_merged; > @@ -143,7 +156,7 @@ struct edge_d > }; > > inline static Edge * > -make_edge (Node *first, Node *second, unsigned int weight) > +make_edge (Node *first, Node *second, unsigned long long weight) > { > Edge *edge; > XNEW_ALLOC (edge, Edge); > @@ -205,6 +218,12 @@ typedef struct section_id_ > char *full_name; > void *handle; > int shndx; > + /* Corresponds to node weight. */ > + unsigned long long weight; > + /* Corresponds to node's computed weight. */ > + unsigned long long computed_weight; > + /* Max count of bb executed in this function. */ > + unsigned long long max_count; > /* Type of prefix in section name. */ > int section_type; > /* Pointer to the next section in the same comdat_group. */ > @@ -213,6 +232,10 @@ typedef struct section_id_ > struct section_id_ *next; > /* Used for grouping sections. */ > struct section_id_ *group; > + /* Pointer to the cold split section if any. If this function > + is comdat hot and kept, pointer to the kept cold split > + section. */ > + struct section_id_ *split_section; > /* Check if this section has been considered for output. */ > char processed; > } Section_id; > @@ -233,6 +256,10 @@ make_section_id (char *name, char *full_name, > s->next = NULL; > s->group = NULL; > s->processed = 0; > + s->weight = 0; > + s->computed_weight = 0; > + s->max_count = 0; > + s->split_section = NULL; > > return s; > } > Index: function_reordering_plugin/function_reordering_plugin.c > =================================================================== > --- function_reordering_plugin/function_reordering_plugin.c (revision 198081) > +++ function_reordering_plugin/function_reordering_plugin.c (working copy) > @@ -97,6 +97,29 @@ static int no_op = 0; > "--plugin-opt,split_segment=yes". */ > static int split_segment = 0; > > +/* If SORT_NAME_PREFIX is true then the sections not touched by the callgraph > + are grouped according to their name prefix. When SORT_NAME_PREFIX is > zero, > + all the sections are put together and sorted according to their node > + weights. The default value of SORT_NAME_PREFIX is 0. Even when sections > + are grouped by their prefix, each group is sorted by the node weights. */ > +int sort_name_prefix = 0; > + > +/* Edge cutoff is used to discard callgraph edges that are not above a > + certain threshold. cutoff_p is to express this as a percent of the > + maximum value and cutoff_a is used to express this as an absolute > + value. The default is to consider all edges. */ > +unsigned int edge_cutoff_p = 0; > +unsigned long long edge_cutoff_a = 0; > + > +/* This is true if the max count of any bb in a function should be used as > + the node weight rather than the count of the entry bb. */ > +int use_max_count = 1; > + > +/* This is used to decide which sections are considered unlikely. If the > + section profile is greater than this value then it is not unlikely > + executed. */ > +unsigned long long unlikely_segment_profile_cutoff = 0; > + > /* Copies new output file name out_file */ > void get_filename (const char *name) > { > @@ -133,6 +156,10 @@ process_option (const char *name) > const char *option_group = "group="; > const char *option_file = "file="; > const char *option_segment = "split_segment="; > + const char *option_edge_cutoff = "edge_cutoff="; > + const char *option_sort_name_prefix = "sort_name_prefix="; > + const char *option_max_count = "use_maxcount="; > + const char *option_unlikely_cutoff = "unlikely_cutoff="; > > /* Check if option is "group=" */ > if (strncmp (name, option_group, strlen (option_group)) == 0) > @@ -164,6 +191,65 @@ process_option (const char *name) > return 0; > } > } > + else if (strncmp (name, option_edge_cutoff, > + strlen (option_edge_cutoff)) == 0) > + { > + const char *a_or_p = name + strlen (option_edge_cutoff); > + if (a_or_p[0] == 'p') > + { > + edge_cutoff_p = atoi (a_or_p + 1); > + return 0; > + } > + else if (a_or_p[0] == 'a') > + { > + edge_cutoff_a = atoll (a_or_p + 1); > + return 0; > + } > + else > + { > + fprintf (stderr, "Wrong format for edge_cutoff, " > + "use edge_cutoff=[p|a]<value>\n"); > + } > + } For the below two, what happens if I pass something other than yes or no? Looks like it will silently use default? Should we warn? > + else if (strncmp (name, option_sort_name_prefix, > + strlen (option_sort_name_prefix)) == 0) > + { > + const char *option_val = name + strlen (option_sort_name_prefix); > + if (strcmp (option_val, "no") == 0) > + { > + sort_name_prefix = 0; > + return 0; > + } > + else if (strcmp (option_val, "yes") == 0) > + { > + sort_name_prefix = 1; > + return 0; > + } > + } > + else if (strncmp (name, option_max_count, > + strlen (option_max_count)) == 0) > + { > + const char *option_val = name + strlen (option_max_count); > + if (strcmp (option_val, "no") == 0) > + { > + use_max_count = 0; > + return 0; > + } > + else if (strcmp (option_val, "yes") == 0) > + { > + use_max_count = 1; > + return 0; > + } > + } > + /* Check if option is unlikely_cutoff. This decides what sections are > + considered unlikely for segment splitting. The default cutoff is 0. */ > + else if (strncmp (name, option_unlikely_cutoff, > + strlen (option_unlikely_cutoff)) == 0) > + { > + const char *option_val = name + strlen (option_unlikely_cutoff); > + unlikely_segment_profile_cutoff = atoll (option_val); What happens if I pass a bad value for this option? > + return 0; > + } > > /* Flag error on unknown plugin option. */ > MSG_ERROR ("Unknown option to function reordering plugin :%s\n", name); > @@ -365,24 +451,27 @@ all_symbols_read_hook (void) > section_list[i].shndx = shndx[i]; > } > > - if (split_segment == 1) > + if (split_segment == 1 > + && unlikely_segment_start >= 0 > + && (unlikely_segment_end >= unlikely_segment_start)) > { > /* Pass the new order of functions to the linker. */ > /* Fix the order of all sections upto the beginning of the > unlikely section. */ > update_section_order (section_list, unlikely_segment_start); > - assert (num_entries >= unlikely_segment_end); > + assert (num_entries > unlikely_segment_end); > /* Fix the order of all sections after the end of the unlikely > section. */ > - update_section_order (section_list, num_entries - > unlikely_segment_end); > + update_section_order (section_list + unlikely_segment_end + 1, > + num_entries - unlikely_segment_end - 1); > /* Map all unlikely code into a new segment. */ > unique_segment_for_sections ( > ".text.unlikely_executed", 0, 0x1000, > section_list + unlikely_segment_start, > - unlikely_segment_end - unlikely_segment_start); > + unlikely_segment_end - unlikely_segment_start + 1); > if (fp != NULL) > fprintf (fp, "Moving %u section(s) to new segment\n", > - unlikely_segment_end - unlikely_segment_start); > + unlikely_segment_end - unlikely_segment_start + 1); > } > else > { > Index: gcc/final.c > =================================================================== > --- gcc/final.c (revision 198081) > +++ gcc/final.c (working copy) > @@ -204,6 +204,9 @@ rtx current_insn_predicate; > /* True if printing into -fdump-final-insns= dump. */ > bool final_insns_dump_p; > > +/* True if the function has a split cold section. */ > +static bool has_cold_section_p; > + > #ifdef HAVE_ATTR_length > static int asm_insn_count (rtx); > #endif > @@ -1934,6 +1937,7 @@ final_scan_insn (rtx insn, FILE *file, int optimiz > targetm.asm_out.function_switched_text_sections (asm_out_file, > current_function_decl, > in_cold_section_p); > + has_cold_section_p = true; > break; > > case NOTE_INSN_BASIC_BLOCK: > @@ -4330,6 +4334,24 @@ dump_cgraph_profiles (void) > } > } > > +/* Iterate through the basic blocks in DECL and get the max count. > + If COLD is true, find the max count of the cold part of the split. */ > +static gcov_type > +get_max_count (tree decl, bool cold) > +{ > + basic_block bb; > + gcov_type max_count = (cgraph_get_node (decl))->count; The above line needs the fix whereby we init max_count to 0 if cold is true. > + > + FOR_EACH_BB (bb) > + { > + if (cold && BB_PARTITION (bb) != BB_COLD_PARTITION) > + continue; > + if (bb->count > max_count) > + max_count = bb->count; > + } > + return max_count; > +} > + > /* Turn the RTL into assembly. */ > static unsigned int > rest_of_handle_final (void) > @@ -4348,6 +4370,8 @@ rest_of_handle_final (void) > gcc_assert (GET_CODE (x) == SYMBOL_REF); > fnname = XSTR (x, 0); > > + has_cold_section_p = false; > + > assemble_start_function (current_function_decl, fnname); > final_start_function (get_insns (), asm_out_file, optimize); > final (get_insns (), asm_out_file, optimize); > @@ -4403,12 +4427,27 @@ rest_of_handle_final (void) > if ((flag_reorder_functions > 1) > && flag_profile_use > && cgraph_get_node (current_function_decl) != NULL > - && (cgraph_get_node (current_function_decl))->callees != NULL) > + && ((cgraph_get_node (current_function_decl))->callees != NULL > + || (cgraph_get_node (current_function_decl))->count > 0)) > { > flags = SECTION_DEBUG | SECTION_EXCLUDE; > asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname); > switch_to_section (get_section (profile_fnname, flags, NULL)); > fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname); > + fprintf (asm_out_file, "\t.string \"Weight " > + HOST_WIDEST_INT_PRINT_DEC > + " " > + HOST_WIDEST_INT_PRINT_DEC > + "\"\n", > + (cgraph_get_node (current_function_decl))->count, > + get_max_count (current_function_decl, false)); > + /* If this function is split into a cold section, record that weight > + here. */ > + if (has_cold_section_p) > + fprintf (asm_out_file, "\t.string \"ColdWeight " > + HOST_WIDEST_INT_PRINT_DEC > + "\"\n", > + get_max_count (current_function_decl, true)); > dump_cgraph_profiles (); > free (profile_fnname); > } > Index: gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C (revision 198081) > +++ gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C (working copy) > @@ -1,41 +0,0 @@ > -/* Verify if call-graph profile sections are created > - with -freorder-functions=. */ > -/* { dg-require-section-exclude "" } */ > -/* { dg-require-linker-function-reordering-plugin "" } */ > -/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections --save-temps > -Wl,-plugin-opt,file=callgraph-profiles.C.dump > -Wl,-plugin-opt,split_segment=yes" } */ > - > -int > -notcalled () > -{ > - return 0; > -} > - > -int __attribute__ ((noinline)) > -foo () > -{ > - return 1; > -} > - > -int __attribute__ ((noinline)) > -bar () > -{ > - return 0; > -} > - > -int main () > -{ > - int sum; > - for (int i = 0; i< 1000; i++) > - { > - sum = foo () + bar(); > - } > - return sum * bar (); > -} > - > -/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */ > -/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */ > -/* { dg-final-use { scan-file callgraph-profiles.C.dump "Callgraph > group : main _Z3barv _Z3foov\n" } } */ > -/* { dg-final-use { scan-file callgraph-profiles.C.dump > "\.text\.*\.main\n.text\.*\._Z3barv\n\.text\.*\._Z3foov\n\.text\.*\._Z9notcalledv" > } } */ > -/* { dg-final-use { scan-file callgraph-profiles.C.dump "Moving 1 > section\\(s\\) to new segment" } } */ > -/* { dg-final-use { cleanup-saved-temps } } */ > -/* { dg-final-use { remove-build-file "callgraph-profiles.C.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C (revision 0) > @@ -0,0 +1,45 @@ > +/* Verify if call-graph profile sections are created with > -freorder-functions=. > + Check of edge profiles and node profiles are present in the profile > + sections. Check if the segment splitting API is invoked. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections --save-temps -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,split_segment=yes" } */ > + > +int > +notcalled () > +{ > + return 0; > +} > + > +int __attribute__ ((noinline)) > +foo () > +{ > + return 1; > +} > + > +int __attribute__ ((noinline)) > +bar () > +{ > + return 0; > +} > + > +int main () > +{ > + int sum; > + for (int i = 0; i< 1000; i++) > + { > + sum = foo () + bar(); > + } > + return sum * bar (); > +} > + > +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */ > +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */ > +/* { dg-final-use { scan-assembler "\.string \"Weight 1000 1000\"" } } */ > +/* { dg-final-use { scan-assembler "\.string \"Weight 1001 1001\"" } } */ > +/* { dg-final-use { scan-file linker.dump "Callgraph group : _Z3foov > _Z3barv main\n" } } */ > +/* { dg-final-use { scan-file linker.dump ".text\.*\._Z9notcalledv > entry count = 0 computed = 0 max count = 0" } } */ > +/* { dg-final-use { scan-file linker.dump > "\.text\.*\._Z3foov.*\n\.text\.*\._Z3barv.*\n\.text\.*\.main.*\n" } } > */ > +/* { dg-final-use { scan-file linker.dump "Moving . section\\(s\\) to > new segment" } } */ > +/* { dg-final-use { cleanup-saved-temps } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C (revision 0) > @@ -0,0 +1,25 @@ > +/* Check if the edge_cutoffa option to the function reordering plugin works > as > + expected. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,edge_cutoff=a1000" } */ > + > +int __attribute__ ((noinline)) > +foo () > +{ > + return 1; > +} > + > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 1000; i++) > + { > + sum += foo (); > + } > + return sum - 1000; > +} > + > +/* { dg-final-use { scan-file linker.dump "Not considering edge with > weight 1000 and below" } } */ > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C (revision 0) > @@ -0,0 +1,25 @@ > +/* Check if the edge_cutoffp option to the function reordering plugin works > as > + expected. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,edge_cutoff=p100" } */ > + > +int __attribute__ ((noinline)) > +foo () > +{ > + return 1; > +} > + > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 1000; i++) > + { > + sum += foo (); > + } > + return sum - 1000; > +} > + > +/* { dg-final-use { scan-file linker.dump "Not considering edge with > weight 1000 and below" } } */ > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C (revision 0) > @@ -0,0 +1,41 @@ > +/* Check if cutting off callgraph gets all functions laid out only according > to > + function profiles and not prefixes. foo_200 is as hot as the other foo's > but > + has a unlikely section prefix. This should not matter as sort_name_prefix > + is turned off. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections > -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=no" > } */ > + > +int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v"))) > +foo_200 () > +{ > + return 1; > +} > + > +int __attribute__ ((noinline)) > +foo_100 () > +{ > + return 1; > +} > + > +int __attribute__ ((noinline)) > +foo_300 () > +{ > + return 1; > +} > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 200; i++) > + sum += foo_200 (); > + for (int i = 0; i< 100; i++) > + sum += foo_100 (); > + for (int i = 0; i< 300; i++) > + sum += foo_300 (); > + return sum - 600; > +} > + > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v > entry count = 200 computed = 200 max count = 200" } } */ > +/* { dg-final-use { scan-file linker.dump > "\.text\.*\._Z7foo_100v.*\n\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_300v.*\n" > } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C (revision 0) > @@ -0,0 +1,41 @@ > +/* Check if cutting off callgraph and using sort_name_prefix gets all > functions laid out > + according to prefixes. foo_200 is almost as hot as the other > foo's but should > + not be grouped with them as it has a different section prefix and > sort_name_prefix is > + turned on. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections > -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=yes" > } */ > + > +int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v"))) > +foo_200 () > +{ > + return 1; > +} > + > +int __attribute__ ((noinline)) > +foo_100 () > +{ > + return 1; > +} > + > +int __attribute__ ((noinline)) > +foo_300 () > +{ > + return 1; > +} > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 200; i++) > + sum += foo_200 (); > + for (int i = 0; i< 100; i++) > + sum += foo_100 (); > + for (int i = 0; i< 300; i++) > + sum += foo_300 (); > + return sum - 600; > +} > + > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v > entry count = 200 computed = 200 max count = 200" } } */ > +/* { dg-final-use { scan-file linker.dump > "\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_100v.*\n\.text\.*\._Z7foo_300v.*\n" > } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C (revision 0) > @@ -0,0 +1,53 @@ > +/* Check if use_maxcount works as expected. This makes the node > profile weight to > + be equal to the maximum count of any basic block in a function > rather than the > + entry count. foo_100's maxcount > foo_200's max count */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=yes" } */ > + > + > +int __attribute__ ((noinline)) > +bar (int *i) > +{ > + (*i)--; > + if (*i >= 0) > + return 1; > + return 0; > +} > + > +int __attribute__ ((noinline)) > +foo_100 (int count) > +{ > + int sum = 0; > + while (count > 0) > + { > + sum += bar(&count); > + } > + return sum; > +} > + > +int __attribute__ ((noinline)) > +foo_200 (int count) > +{ > + int sum = 0; > + while (count > 0) > + { > + sum += bar(&count); > + } > + return sum; > +} > + > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 200; i++) > + sum += foo_200 (100); > + for (int i = 0; i< 100; i++) > + sum += foo_100 (400); > + return sum - 60000; > +} > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i > entry count = 100 computed = 100 max count = 40000" } } */ > +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i > entry count = 200 computed = 200 max count = 20000" } } */ > +/* { dg-final-use { scan-file linker.dump > "\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z3barPi.*\n" > } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C (revision 0) > @@ -0,0 +1,55 @@ > +/* Check if turning off use_maxcount works as expected. This makes the node > + profile weight to be equal to the entry count of any basic block in a > + function rather than the max count. > + foo_100's maxcount > foo_200's max count but > + foo_100's entry count < foo_200's entry count. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=no" } */ > + > + > +int __attribute__ ((noinline)) > +bar (int *i) > +{ > + (*i)--; > + if (*i >= 0) > + return 1; > + return 0; > +} > + > +int __attribute__ ((noinline)) > +foo_100 (int count) > +{ > + int sum = 0; > + while (count > 0) > + { > + sum += bar(&count); > + } > + return sum; > +} > + > +int __attribute__ ((noinline)) > +foo_200 (int count) > +{ > + int sum = 0; > + while (count > 0) > + { > + sum += bar(&count); > + } > + return sum; > +} > + > +int main () > +{ > + int sum = 0; > + for (int i = 0; i< 200; i++) > + sum += foo_200 (100); > + for (int i = 0; i< 100; i++) > + sum += foo_100 (400); > + return sum - 60000; > +} > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i > entry count = 100 computed = 100 max count = 40000" } } */ > +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i > entry count = 200 computed = 200 max count = 20000" } } */ > +/* { dg-final-use { scan-file linker.dump > "\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z3barPi.*\n" > } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C > =================================================================== > --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C (revision 0) > +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C (revision 0) > @@ -0,0 +1,19 @@ > +/* Check if unlikely_cutoff works as expected. Function foo is > unlikely because of the cutoff. */ > +/* { dg-require-section-exclude "" } */ > +/* { dg-require-linker-function-reordering-plugin "" } */ > +/* { dg-options "-O2 -freorder-functions=callgraph > -ffunction-sections -Wl,-plugin-opt,file=linker.dump > -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,unlikely_cutoff=1" } */ > + > +int __attribute__ ((noinline,section(".text.hot._Z3foov"))) > +foo () > +{ > + return 0; > +} > + > +int main() > +{ > + return foo (); > +} > + > +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ > +/* { dg-final-use { scan-file linker.dump "=== Unlikely sections > start ===\n.*\.text\.hot\._Z3foov.* entry count = 1 computed = 1 max > count = 1\n.*=== Unlikely sections end ===" } } */ > +/* { dg-final-use { remove-build-file "linker.dump" } } */ > >> >> Thanks, >> Teresa >> >>> >>> Thanks >>> Sri >> >> >> >> -- >> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413 > > > > -- > Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413 -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413