Hi, this patch makes indirect call profiling to work cross module. Unlike LIPO I am not adding module IDs, since I do not know how to make them stable across multiple uses of same .o files. Instead I simply assign unique ID to each possibly indirectly called function in program. This is done by combining its assembler name, file&line and gcov filename into single hash. For GCC this gives no colisions.
The rest of updates is quite obvious. Currently we have moudle local __gcov_indirect_call_callee and __gcov_indirect_call_counters to track the calls. I made the global and define them in libgcov. __gcov_indirect_call_profiler used to take these two as parameters and I replaced it by __gcov_indirect_call_profiler_v2 that has those two hard coded to simplify the call sequence. This patch has only purpose to measure the cross-module calls and get sane histograms attached to indirect calls. In the third patch of series I will actually make them used by the LTO ipa-profile pass. Bootstrapped/regtested x86_64-linux, will commit it shortly. Honza Index: libgcc/libgcov.c =================================================================== --- libgcc/libgcov.c (revision 201539) +++ libgcc/libgcov.c (working copy) @@ -1121,6 +1121,20 @@ __gcov_one_value_profiler (gcov_type *co #ifdef L_gcov_indirect_call_profiler +/* These two variables are used to actually track caller and callee. Keep + them in TLS memory so races are not common (they are written to often). + The variables are set directly by GCC instrumented code, so declaration + here must match one in tree-profile.c */ + +#ifdef HAVE_CC_TLS +__thread +#endif +void * __gcov_indirect_call_callee; +#ifdef HAVE_CC_TLS +__thread +#endif +gcov_type * __gcov_indirect_call_counters; + /* By default, the C++ compiler will use function addresses in the vtable entries. Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero tells the compiler to use function descriptors instead. The value @@ -1140,19 +1154,43 @@ __gcov_one_value_profiler (gcov_type *co /* Tries to determine the most common value among its inputs. */ void -__gcov_indirect_call_profiler (gcov_type* counter, gcov_type value, - void* cur_func, void* callee_func) +__gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func) { /* If the C++ virtual tables contain function descriptors then one function may have multiple descriptors and we need to dereference the descriptors to see if they point to the same function. */ - if (cur_func == callee_func - || (VTABLE_USES_DESCRIPTORS && callee_func - && *(void **) cur_func == *(void **) callee_func)) - __gcov_one_value_profiler_body (counter, value); + if (cur_func == __gcov_indirect_call_callee + || (VTABLE_USES_DESCRIPTORS && __gcov_indirect_call_callee + && *(void **) cur_func == *(void **) __gcov_indirect_call_callee)) + __gcov_one_value_profiler_body (__gcov_indirect_call_counters, value); } #endif #ifdef L_gcov_average_profiler /* Increase corresponding COUNTER by VALUE. FIXME: Perhaps we want Index: gcc/value-prof.c =================================================================== --- gcc/value-prof.c (revision 201632) +++ gcc/value-prof.c (working copy) @@ -1173,24 +1173,56 @@ gimple_mod_subtract_transform (gimple_st return true; } -static vec<cgraph_node_ptr> cgraph_node_map - = vNULL; +static pointer_map_t *cgraph_node_map; /* Initialize map from FUNCDEF_NO to CGRAPH_NODE. */ void -init_node_map (void) +init_node_map (bool local) { struct cgraph_node *n; + cgraph_node_map = pointer_map_create (); - if (get_last_funcdef_no ()) - cgraph_node_map.safe_grow_cleared (get_last_funcdef_no ()); - - FOR_EACH_FUNCTION (n) - { - if (DECL_STRUCT_FUNCTION (n->symbol.decl)) - cgraph_node_map[DECL_STRUCT_FUNCTION (n->symbol.decl)->funcdef_no] = n; - } + FOR_EACH_DEFINED_FUNCTION (n) + if (cgraph_function_with_gimple_body_p (n) + && !cgraph_only_called_directly_p (n)) + { + void **val; + if (local) + { + n->profile_id = coverage_compute_profile_id (n); + while ((val = pointer_map_contains (cgraph_node_map, (void *)(size_t)n->profile_id)) || !n->profile_id) + { + if (dump_file) + fprintf (dump_file, "Local profile-id %i conflict with nodes %s/%i %s/%i\n", + n->profile_id, + cgraph_node_name (n), + n->symbol.order, + symtab_node_name (*(symtab_node*)val), + (*(symtab_node *)val)->symbol.order); + n->profile_id = (n->profile_id + 1) & 0x7fffffff; + } + } + else if (!n->profile_id) + { + if (dump_file) + fprintf (dump_file, "Node %s/%i has no profile-id (profile feedback missing?)\n", + cgraph_node_name (n), + n->symbol.order); + continue; + } + else if ((val = pointer_map_contains (cgraph_node_map, (void *)(size_t)n->profile_id))) + { + if (dump_file) + fprintf (dump_file, "Node %s/%i has IP profile-id %i conflict. Giving up.\n", + cgraph_node_name (n), + n->symbol.order, + n->profile_id); + *val = NULL; + continue; + } + *pointer_map_insert (cgraph_node_map, (void *)(size_t)n->profile_id) = (void *)n; + } } /* Delete the CGRAPH_NODE_MAP. */ @@ -1198,27 +1230,20 @@ init_node_map (void) void del_node_map (void) { - cgraph_node_map.release (); + pointer_map_destroy (cgraph_node_map); } /* Return cgraph node for function with pid */ -static inline struct cgraph_node* -find_func_by_funcdef_no (int func_id) +struct cgraph_node* +find_func_by_profile_id (int func_id) { - int max_id = get_last_funcdef_no (); - if (func_id >= max_id || cgraph_node_map[func_id] == NULL) - { - if (flag_profile_correction) - inform (DECL_SOURCE_LOCATION (current_function_decl), - "Inconsistent profile: indirect call target (%d) does not exist", func_id); - else - error ("Inconsistent profile: indirect call target (%d) does not exist", func_id); - - return NULL; - } - - return cgraph_node_map[func_id]; + void **val = pointer_map_contains (cgraph_node_map, + (void *)(size_t)func_id); + if (val) + return (struct cgraph_node *)*val; + else + return NULL; } /* Perform sanity check on the indirect call target. Due to race conditions, @@ -1415,10 +1440,12 @@ gimple_ic_transform (gimple_stmt_iterato val = histogram->hvalue.counters [0]; count = histogram->hvalue.counters [1]; all = histogram->hvalue.counters [2]; - gimple_remove_histogram_value (cfun, stmt, histogram); if (4 * count <= 3 * all) - return false; + { + gimple_remove_histogram_value (cfun, stmt, histogram); + return false; + } bb_all = gimple_bb (stmt)->count; /* The order of CHECK_COUNTER calls is important - @@ -1426,16 +1453,31 @@ gimple_ic_transform (gimple_stmt_iterato and we want to make count <= all <= bb_all. */ if ( check_counter (stmt, "ic", &all, &bb_all, bb_all) || check_counter (stmt, "ic", &count, &all, all)) - return false; + { + gimple_remove_histogram_value (cfun, stmt, histogram); + return false; + } if (all > 0) prob = GCOV_COMPUTE_SCALE (count, all); else prob = 0; - direct_call = find_func_by_funcdef_no ((int)val); + direct_call = find_func_by_profile_id ((int)val); if (direct_call == NULL) - return false; + { + if (val) + { + if (dump_file) + { + fprintf (dump_file, "Indirect call -> direct call from other module"); + print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); + fprintf (dump_file, "=> %i (will resolve only with LTO)\n", (int)val); + } + } + return false; + } + gimple_remove_histogram_value (cfun, stmt, histogram); if (!check_ic_target (stmt, direct_call)) return false; Index: gcc/value-prof.h =================================================================== --- gcc/value-prof.h (revision 201632) +++ gcc/value-prof.h (working copy) @@ -103,6 +103,10 @@ extern void gimple_gen_average_profiler extern void gimple_gen_ior_profiler (histogram_value, unsigned, unsigned); extern void stream_out_histogram_value (struct output_block *, histogram_value); extern void stream_in_histogram_value (struct lto_input_block *, gimple); +extern struct cgraph_node* find_func_by_profile_id (int func_id); +extern gimple gimple_ic (gimple, struct cgraph_node *, + int, gcov_type, gcov_type); + /* In profile.c. */ extern void init_branch_prob (void); Index: gcc/gcov-io.h =================================================================== --- gcc/gcov-io.h (revision 201632) +++ gcc/gcov-io.h (working copy) @@ -515,7 +515,7 @@ extern void __gcov_merge_ior (gcov_type extern void __gcov_interval_profiler (gcov_type *, gcov_type, int, unsigned); extern void __gcov_pow2_profiler (gcov_type *, gcov_type); extern void __gcov_one_value_profiler (gcov_type *, gcov_type); -extern void __gcov_indirect_call_profiler (gcov_type *, gcov_type, void *, void *); +extern void __gcov_indirect_call_profiler_v2 (gcov_type, void *); extern void __gcov_average_profiler (gcov_type *, gcov_type); extern void __gcov_ior_profiler (gcov_type *, gcov_type); Index: gcc/profile.h =================================================================== --- gcc/profile.h (revision 201632) +++ gcc/profile.h (working copy) @@ -43,7 +43,7 @@ extern void mcf_smooth_cfg (void); extern gcov_type sum_edge_counts (vec<edge, va_gc> *edges); -extern void init_node_map (void); +extern void init_node_map (bool); extern void del_node_map (void); extern void get_working_sets (void); Index: gcc/coverage.c =================================================================== --- gcc/coverage.c (revision 201632) +++ gcc/coverage.c (working copy) @@ -539,6 +539,28 @@ coverage_compute_lineno_checksum (void) return chksum; } +/* Compute profile ID. This is better to be unique in whole program. */ + +unsigned +coverage_compute_profile_id (struct cgraph_node *n) +{ + expanded_location xloc + = expand_location (DECL_SOURCE_LOCATION (n->symbol.decl)); + unsigned chksum = xloc.line; + + chksum = coverage_checksum_string (chksum, xloc.file); + chksum = coverage_checksum_string + (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->symbol.decl))); + if (first_global_object_name) + chksum = coverage_checksum_string + (chksum, first_global_object_name); + chksum = coverage_checksum_string + (chksum, aux_base_name); + + /* Non-negative integers are hopefully small enough to fit in all targets. */ + return chksum & 0x7fffffff; +} + /* Compute cfg checksum for the current function. The checksum is calculated carefully so that source code changes that doesn't affect the control flow graph Index: gcc/coverage.h =================================================================== --- gcc/coverage.h (revision 201632) +++ gcc/coverage.h (working copy) @@ -35,6 +35,9 @@ extern void coverage_end_function (unsig /* Compute the control flow checksum for the current function. */ extern unsigned coverage_compute_cfg_checksum (void); +/* Compute the profile id of function N. */ +extern unsigned coverage_compute_profile_id (struct cgraph_node *n); + /* Compute the line number checksum for the current function. */ extern unsigned coverage_compute_lineno_checksum (void); Index: gcc/tree-profile.c =================================================================== --- gcc/tree-profile.c (revision 201632) +++ gcc/tree-profile.c (working copy) @@ -57,8 +57,8 @@ static GTY(()) tree ptr_void; /* Do initialization work for the edge profiler. */ /* Add code: - static gcov* __gcov_indirect_call_counters; // pointer to actual counter - static void* __gcov_indirect_call_callee; // actual callee address + __thread gcov* __gcov_indirect_call_counters; // pointer to actual counter + __thread void* __gcov_indirect_call_callee; // actual callee address */ static void init_ic_make_global_vars (void) @@ -72,7 +72,8 @@ init_ic_make_global_vars (void) get_identifier ("__gcov_indirect_call_callee"), ptr_void); TREE_STATIC (ic_void_ptr_var) = 1; - TREE_PUBLIC (ic_void_ptr_var) = 0; + TREE_PUBLIC (ic_void_ptr_var) = 1; + DECL_EXTERNAL (ic_void_ptr_var) = 1; DECL_ARTIFICIAL (ic_void_ptr_var) = 1; DECL_INITIAL (ic_void_ptr_var) = NULL; if (targetm.have_tls) @@ -87,7 +88,8 @@ init_ic_make_global_vars (void) get_identifier ("__gcov_indirect_call_counters"), gcov_type_ptr); TREE_STATIC (ic_gcov_type_ptr_var) = 1; - TREE_PUBLIC (ic_gcov_type_ptr_var) = 0; + TREE_PUBLIC (ic_gcov_type_ptr_var) = 1; + DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1; DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1; DECL_INITIAL (ic_gcov_type_ptr_var) = NULL; if (targetm.have_tls) @@ -155,14 +157,14 @@ gimple_init_edge_profiler (void) init_ic_make_global_vars (); - /* void (*) (gcov_type *, gcov_type, void *, void *) */ + /* void (*) (gcov_type, void *) */ ic_profiler_fn_type = build_function_type_list (void_type_node, - gcov_type_ptr, gcov_type_node, + gcov_type_node, ptr_void, - ptr_void, NULL_TREE); + NULL_TREE); tree_indirect_call_profiler_fn - = build_fn_decl ("__gcov_indirect_call_profiler", + = build_fn_decl ("__gcov_indirect_call_profiler_v2", ic_profiler_fn_type); TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1; DECL_ATTRIBUTES (tree_indirect_call_profiler_fn) @@ -352,7 +354,7 @@ gimple_gen_ic_func_profiler (void) struct cgraph_node * c_node = cgraph_get_node (current_function_decl); gimple_stmt_iterator gsi; gimple stmt1, stmt2; - tree tree_uid, cur_func, counter_ptr, ptr_var, void0; + tree tree_uid, cur_func, void0; if (cgraph_only_called_directly_p (c_node)) return; @@ -361,27 +363,19 @@ gimple_gen_ic_func_profiler (void) /* Insert code: - stmt1: __gcov_indirect_call_profiler (__gcov_indirect_call_counters, - current_function_funcdef_no, - ¤t_function_decl, - __gcov_indirect_call_callee); + stmt1: __gcov_indirect_call_profiler (profile_id, + ¤t_function_decl) */ - gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); + gsi = gsi_after_labels (split_edge (single_succ_edge (ENTRY_BLOCK_PTR))); cur_func = force_gimple_operand_gsi (&gsi, build_addr (current_function_decl, current_function_decl), true, NULL_TREE, true, GSI_SAME_STMT); - counter_ptr = force_gimple_operand_gsi (&gsi, ic_gcov_type_ptr_var, - true, NULL_TREE, true, - GSI_SAME_STMT); - ptr_var = force_gimple_operand_gsi (&gsi, ic_void_ptr_var, - true, NULL_TREE, true, - GSI_SAME_STMT); - tree_uid = build_int_cst (gcov_type_node, current_function_funcdef_no); - stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 4, - counter_ptr, tree_uid, cur_func, ptr_var); + tree_uid = build_int_cst (gcov_type_node, cgraph_get_node (current_function_decl)->profile_id); + stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2, + tree_uid, cur_func); gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT); /* Set __gcov_indirect_call_callee to 0, @@ -461,7 +455,7 @@ tree_profiling (void) cgraphunit.c:ipa_passes(). */ gcc_assert (cgraph_state == CGRAPH_STATE_IPA_SSA); - init_node_map(); + init_node_map (true); FOR_EACH_DEFINED_FUNCTION (node) {