https://gcc.gnu.org/g:09db37f7cea79f1cfcede455763e5e2da28ae2d5
commit r16-1961-g09db37f7cea79f1cfcede455763e5e2da28ae2d5 Author: Jan Hubicka <hubi...@ucw.cz> Date: Tue Jul 1 08:32:56 2025 +0200 Auto-FDO/FDO profile comparator the patch I sent from airport only worked if you produced the gcda files with unpatched compiler. For some reason auto-profile reading is interwinded into gcov reading which is not necessary. Here is cleaner version which also makes the format bit more convenient. One can now grep as: grep "bb.*fdo.*very hot.*cold" *.profile | sort -n -k 5 -r | less digits_2/30 bb 307 fdo 10273284651 (very hot) afdo 0 (auto FDO) (cold) scaled 0 diff -10273284651, -100.00% digits_2/30 bb 201 fdo 2295561442 (very hot) afdo 19074 (auto FDO) (cold) scaled 1341585 diff -2294219857, -99.94% digits_2/30 bb 203 fdo 1236123372 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -1235452580, -99.95% digits_2/30 bb 200 fdo 1236123372 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -1235452580, -99.95% digits_2/30 bb 202 fdo 1059438070 (very hot) afdo 9537 (auto FDO) (cold) scaled 670792 diff -1058767278, -99.94% new_solver/9 bb 246 fdo 413879041 (very hot) afdo 76594 (guessed) (cold) scaled 5387299 diff -408491742, -98.70% new_solver/9 bb 167 fdo 413792205 (very hot) afdo 76594 (guessed) (cold) scaled 5387299 diff -408404906, -98.70% new_solver/9 bb 159 fdo 387809230 (very hot) afdo 57182 (guessed) (cold) scaled 4021940 diff -383787290, -98.96% new_solver/9 bb 158 fdo 387809230 (very hot) afdo 60510 (guessed) (cold) scaled 4256018 diff -383553212, -98.90% new_solver/9 bb 138 fdo 387809230 (very hot) afdo 40917 (guessed) (cold) scaled 2877929 diff -384931301, -99.26% new_solver/9 bb 137 fdo 387809230 (very hot) afdo 43298 (guessed) (cold) scaled 3045398 diff -384763832, -99.21% This dumps basic blocks that do have large counts by normal profile feedback but autofdo gives them small count (so they get cold). These seems to be indeed mostly basic blocks controlling loops. gcc/ChangeLog: * auto-profile.cc (afdo_hot_bb_threshod): New global variable. (maybe_hot_afdo_count_p): New function. (autofdo_source_profile::read): Do not set up dump file; set afdo_hot_bb_threshod. (afdo_annotate_cfg): Handle partial training. (afdo_callsite_hot_enough_for_early_inline): Use maybe_hot_afdo_count_p. (auto_profile_offline::execute): Read autofdo file. * auto-profile.h (maybe_hot_afdo_count_p): Declare. (afdo_hot_bb_threshold): Declare. * coverage.cc (read_counts_file): Also set gcov_profile_info. (coverage_init): Do not read autofdo file. * opts.cc (enable_fdo_optimizations): Add autofdo parameter; do not set flag_branch_probabilities and flag_profile_values with it. (common_handle_option): Update. * passes.cc (finish_optimization_passes): Do not end branch prob here. (pass_manager::dump_profile_report): Also mark change after autofdo pass. * profile.cc: Include auto-profile.h (gcov_profile_info): New global variable. (struct afdo_fdo_record): New struture. (compute_branch_probabilities): Record afdo profile. (end_branch_prob): Dump afdo/fdo profile comparsion. * profile.h (gcov_profile_info): Declarre. * tree-profile.cc (tree_profiling): Call end_branch_prob (pass_ipa_tree_profile::gate): Also enable with autoFDO Diff: --- gcc/auto-profile.cc | 53 ++++++++++++++++++------- gcc/auto-profile.h | 7 ++++ gcc/coverage.cc | 6 +-- gcc/opts.cc | 13 +++--- gcc/passes.cc | 8 +--- gcc/profile.cc | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/profile.h | 2 +- gcc/tree-profile.cc | 7 ++-- 8 files changed, 170 insertions(+), 37 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index d78f2cb42b5c..743b005938c1 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -123,6 +123,18 @@ along with GCC; see the file COPYING3. If not see #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" #define AUTO_PROFILE_VERSION 2 +/* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are + considered cols. */ +gcov_type afdo_hot_bb_threshod = -1; + +/* Return ture if COUNT is possiby hot. */ +bool +maybe_hot_afdo_count_p (profile_count count) +{ + gcc_checking_assert (count.ipa ().initialized_p ()); + return count.ipa ().to_gcov_type () >= afdo_hot_bb_threshod; +} + namespace autofdo { @@ -1908,9 +1920,6 @@ autofdo_source_profile::read () /* Read in the function/callsite profile, and store it in local data structure. */ unsigned function_num = gcov_read_unsigned (); - int profile_pass_num - = g->get_passes ()->get_pass_auto_profile ()->static_pass_number; - g->get_dumps ()->dump_start (profile_pass_num, NULL); for (unsigned i = 0; i < function_num; i++) { function_instance::function_instance_stack stack; @@ -1926,12 +1935,18 @@ autofdo_source_profile::read () "auto-profile contains duplicated function instance %s", afdo_string_table->get_name (s->name ())); } + int hot_frac = param_hot_bb_count_fraction; /* Scale up the profile, but leave some bits in case some counts gets bigger than sum_max eventually. */ if (afdo_profile_info->sum_max) afdo_count_scale = MAX (((gcov_type)1 << (profile_count::n_bits / 2)) / afdo_profile_info->sum_max, 1); + afdo_hot_bb_threshod + = hot_frac + ? afdo_profile_info->sum_max * afdo_count_scale / hot_frac + : (gcov_type)profile_count::max_count; + set_hot_bb_threshold (afdo_hot_bb_threshod); if (dump_file) fprintf (dump_file, "Max count in profile %" PRIu64 "\n" "Setting scale %" PRIu64 "\n" @@ -1940,10 +1955,8 @@ autofdo_source_profile::read () (int64_t)afdo_profile_info->sum_max, (int64_t)afdo_count_scale, (int64_t)(afdo_profile_info->sum_max * afdo_count_scale), - (int64_t)(afdo_profile_info->sum_max * afdo_count_scale - / param_hot_bb_count_fraction)); + (int64_t)afdo_hot_bb_threshod); afdo_profile_info->sum_max *= afdo_count_scale; - g->get_dumps ()->dump_finish (profile_pass_num); return true; } @@ -3083,6 +3096,16 @@ afdo_annotate_cfg (void) if (dump_file) fprintf (dump_file, "No afdo profile for %s\n", cgraph_node::get (current_function_decl)->dump_name ()); + /* create_gcov only dumps symbols with some samples in them. + This means that we get nonempty zero_bbs only if some + nonzero counts in profile were not matched with statements. */ + if (!flag_profile_partial_training) + { + FOR_ALL_BB_FN (bb, cfun) + if (bb->count.quality () == GUESSED_LOCAL) + bb->count = bb->count.global0afdo (); + update_max_bb_count (); + } return; } @@ -3153,9 +3176,13 @@ afdo_annotate_cfg (void) if (dump_file) fprintf (dump_file, "Setting global count to afdo0\n"); } - FOR_ALL_BB_FN (bb, cfun) - if (bb->count.quality () == GUESSED_LOCAL) - bb->count = bb->count.global0afdo (); + if (!flag_profile_partial_training) + { + FOR_ALL_BB_FN (bb, cfun) + if (bb->count.quality () == GUESSED_LOCAL) + bb->count = bb->count.global0afdo (); + update_max_bb_count (); + } loop_optimizer_finalize (); free_dominance_info (CDI_DOMINATORS); @@ -3305,11 +3332,7 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) { bool is_hot; profile_count pcount = profile_count::from_gcov_type (count).afdo (); - gcov_summary *saved_profile_info = profile_info; - /* At early inline stage, profile_info is not set yet. We need to - temporarily set it to afdo_profile_info to calculate hotness. */ - profile_info = autofdo::afdo_profile_info; - is_hot = maybe_hot_count_p (NULL, pcount); + is_hot = maybe_hot_afdo_count_p (pcount); if (dump_file) { fprintf (dump_file, "Call %s -> %s has %s afdo profile count ", @@ -3318,7 +3341,6 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) pcount.dump (dump_file); fprintf (dump_file, "\n"); } - profile_info = saved_profile_info; return is_hot; } @@ -3471,6 +3493,7 @@ public: unsigned int execute (function *) final override { + read_autofdo_file (); if (autofdo::afdo_source_profile) autofdo::afdo_source_profile->offline_external_functions (); return 0; diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h index d31a20811038..639e263ef7a9 100644 --- a/gcc/auto-profile.h +++ b/gcc/auto-profile.h @@ -35,4 +35,11 @@ extern bool afdo_vpt_for_early_inline (cgraph_node *node); do not repeat it later. */ extern void remove_afdo_speculative_target (cgraph_edge *); +/* profile counts determined by AFDO smaller than afdo_hot_bb_threshold are + considered cols. */ +extern gcov_type afdo_hot_bb_threshold; + +/* Return ture if COUNT is possiby hot. */ +extern bool maybe_hot_afdo_count_p (profile_count count); + #endif /* AUTO_PROFILE_H */ diff --git a/gcc/coverage.cc b/gcc/coverage.cc index c0ae76a40ef1..dd3ed2ed8429 100644 --- a/gcc/coverage.cc +++ b/gcc/coverage.cc @@ -235,7 +235,7 @@ read_counts_file (void) } else if (tag == GCOV_TAG_OBJECT_SUMMARY) { - profile_info = XCNEW (gcov_summary); + gcov_profile_info = profile_info = XCNEW (gcov_summary); profile_info->runs = gcov_read_unsigned (); profile_info->sum_max = gcov_read_unsigned (); } @@ -1315,9 +1315,7 @@ coverage_init (const char *filename) strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX); bbg_file_stamp = local_tick; - if (flag_auto_profile) - read_autofdo_file (); - else if (flag_branch_probabilities) + if (flag_branch_probabilities) read_counts_file (); /* Name of bbg file. */ diff --git a/gcc/opts.cc b/gcc/opts.cc index 4e39b9591522..6ca1ec7e865d 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -2080,10 +2080,13 @@ print_specific_help (unsigned int include_flags, static void enable_fdo_optimizations (struct gcc_options *opts, struct gcc_options *opts_set, - int value) + int value, bool autofdo) { - SET_OPTION_IF_UNSET (opts, opts_set, flag_branch_probabilities, value); - SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_values, value); + if (!autofdo) + { + SET_OPTION_IF_UNSET (opts, opts_set, flag_branch_probabilities, value); + SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_values, value); + } SET_OPTION_IF_UNSET (opts, opts_set, flag_unroll_loops, value); SET_OPTION_IF_UNSET (opts, opts_set, flag_peel_loops, value); SET_OPTION_IF_UNSET (opts, opts_set, flag_tracer, value); @@ -3124,7 +3127,7 @@ common_handle_option (struct gcc_options *opts, /* No break here - do -fprofile-use processing. */ /* FALLTHRU */ case OPT_fprofile_use: - enable_fdo_optimizations (opts, opts_set, value); + enable_fdo_optimizations (opts, opts_set, value, false); SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_reorder_functions, value); /* Indirect call profiling should do all useful transformations @@ -3141,7 +3144,7 @@ common_handle_option (struct gcc_options *opts, /* No break here - do -fauto-profile processing. */ /* FALLTHRU */ case OPT_fauto_profile: - enable_fdo_optimizations (opts, opts_set, value); + enable_fdo_optimizations (opts, opts_set, value, true); SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value); break; diff --git a/gcc/passes.cc b/gcc/passes.cc index 6c67ffe56ba9..a33c8d924a52 100644 --- a/gcc/passes.cc +++ b/gcc/passes.cc @@ -355,13 +355,6 @@ finish_optimization_passes (void) gcc::dump_manager *dumps = m_ctxt->get_dumps (); timevar_push (TV_DUMP); - if (coverage_instrumentation_p () || flag_test_coverage - || flag_branch_probabilities) - { - dumps->dump_start (m_pass_profile_1->static_pass_number, NULL); - end_branch_prob (); - dumps->dump_finish (m_pass_profile_1->static_pass_number); - } /* Do whatever is necessary to finish printing the graphs. */ for (i = TDI_end; (dfi = dumps->get_dump_file_info (i)) != NULL; ++i) @@ -2036,6 +2029,7 @@ pass_manager::dump_profile_report () const fprintf (dump_file, "| %12.0f", profile_record[i].time); /* Time units changes with profile estimate and feedback. */ if (i == m_pass_profile_1->static_pass_number + || i == m_pass_ipa_auto_profile_1->static_pass_number || i == m_pass_ipa_tree_profile_1->static_pass_number) fprintf (dump_file, "-------------"); else if (rel_time_change) diff --git a/gcc/profile.cc b/gcc/profile.cc index 6234dd2d4e2d..5d581e7b5ec1 100644 --- a/gcc/profile.cc +++ b/gcc/profile.cc @@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see #include "file-prefix-map.h" #include "profile.h" +#include "auto-profile.h" struct condcov; struct condcov *find_conditions (struct function*); @@ -97,7 +98,7 @@ struct bb_profile_info { /* Counter summary from the last set of coverage counts read. */ -gcov_summary *profile_info; +gcov_summary *profile_info, *gcov_profile_info; /* Collect statistics on the performance of this pass for the entire source file. */ @@ -113,6 +114,27 @@ static int total_hist_br_prob[20]; static int total_num_branches; static int total_num_conds; +/* Map between auto-fdo and fdo counts used to compare quality + of the profiles. */ +struct afdo_fdo_record +{ + cgraph_node *node; + struct bb_record + { + /* Index of the basic block. */ + int index; + profile_count afdo; + profile_count fdo; + + /* Successors and predecessors in CFG. */ + vec <int> preds; + vec <int> succs; + }; + vec <bb_record> bbs; +}; + +static vec <afdo_fdo_record> afdo_fdo_records; + /* Forward declarations. */ static void find_spanning_tree (struct edge_list *); @@ -472,6 +494,22 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) BB_INFO (EXIT_BLOCK_PTR_FOR_FN (cfun))->succ_count = 2; BB_INFO (ENTRY_BLOCK_PTR_FOR_FN (cfun))->pred_count = 2; + afdo_fdo_record record = {cgraph_node::get (current_function_decl), vNULL};; + if (dump_file && flag_auto_profile) + { + FOR_ALL_BB_FN (bb, cfun) + { + record.bbs.safe_push ({bb->index, bb->count.ipa (), + profile_count::uninitialized (), vNULL, vNULL}); + record.bbs.last ().preds.reserve (EDGE_COUNT (bb->preds)); + for (auto &e : bb->preds) + record.bbs.last ().preds.safe_push (e->src->index); + record.bbs.last ().succs.reserve (EDGE_COUNT (bb->succs)); + for (auto &e : bb->succs) + record.bbs.last ().succs.safe_push (e->dest->index); + } + } + num_edges = read_profile_edge_counts (exec_counts); if (dump_file) @@ -812,6 +850,18 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) delete edge_gcov_counts; edge_gcov_counts = NULL; + if (dump_file && flag_auto_profile) + { + int i = 0; + FOR_ALL_BB_FN (bb, cfun) + { + gcc_checking_assert (record.bbs[i].index == bb->index); + record.bbs[i].fdo = bb->count.ipa (); + i++; + } + afdo_fdo_records.safe_push (record); + } + update_max_bb_count (); if (dump_file) @@ -1804,6 +1854,65 @@ end_branch_prob (void) } fprintf (dump_file, "Total number of conditions: %d\n", total_num_conds); + if (afdo_fdo_records.length ()) + { + profile_count fdo_sum = profile_count::zero (); + profile_count afdo_sum = profile_count::zero (); + for (const auto &r : afdo_fdo_records) + for (const auto &b : r.bbs) + if (b.fdo.initialized_p () && b.afdo.initialized_p ()) + { + fdo_sum += b.fdo; + afdo_sum += b.afdo; + } + for (auto &r : afdo_fdo_records) + { + for (auto &b : r.bbs) + if (b.fdo.initialized_p () && b.afdo.initialized_p ()) + { + fprintf (dump_file, "%s bb %i fdo %" PRIu64 " (%s) afdo ", + r.node->dump_name (), b.index, + (int64_t)b.fdo.to_gcov_type (), + maybe_hot_count_p + (NULL, b.fdo.apply_scale (1, 1000)) + ? "very hot" + : maybe_hot_count_p (NULL, b.fdo) + ? "hot" : "cold"); + b.afdo.dump (dump_file); + fprintf (dump_file, " (%s) ", + maybe_hot_afdo_count_p + (b.afdo.apply_scale (1, 1000)) + ? "very hot" + : maybe_hot_afdo_count_p (b.afdo) + ? "hot" : "cold"); + if (afdo_sum.nonzero_p ()) + { + profile_count scaled + = b.afdo.apply_scale (fdo_sum, afdo_sum); + fprintf (dump_file, "scaled %" PRIu64, + scaled.to_gcov_type ()); + if (b.fdo.to_gcov_type ()) + fprintf (dump_file, " diff %" PRId64 ", %+2.2f%%", + scaled.to_gcov_type () + - b.fdo.to_gcov_type (), + (scaled.to_gcov_type () + - b.fdo.to_gcov_type ()) * 100.0 + / b.fdo.to_gcov_type ()); + } + fprintf (dump_file, "\n preds"); + for (int val : b.preds) + fprintf (dump_file, " %i", val); + b.preds.release (); + fprintf (dump_file, "\n succs"); + for (int val : b.succs) + fprintf (dump_file, " %i", val); + b.succs.release (); + fprintf (dump_file, "\n"); + } + r.bbs.release (); + } + } + afdo_fdo_records.release (); } } diff --git a/gcc/profile.h b/gcc/profile.h index a97445b8f6f3..8ccdfc4673d4 100644 --- a/gcc/profile.h +++ b/gcc/profile.h @@ -75,7 +75,7 @@ extern void get_working_sets (void); /* Counter summary from the last set of coverage counts read by profile.cc. */ -extern struct gcov_summary *profile_info; +extern struct gcov_summary *profile_info, *gcov_profile_info; /* Return true if any cfg coverage/profiling is enabled; -fprofile-arcs -fcondition-coverage -fpath-coverage. */ diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc index fed218eb60bc..fe20e84838d8 100644 --- a/gcc/tree-profile.cc +++ b/gcc/tree-profile.cc @@ -2031,6 +2031,7 @@ tree_profiling (void) handle_missing_profiles (); del_node_map (); + end_branch_prob (); return 0; } @@ -2065,10 +2066,8 @@ public: bool pass_ipa_tree_profile::gate (function *) { - /* When profile instrumentation, use or test coverage shall be performed. - But for AutoFDO, this there is no instrumentation, thus this pass is - disabled. */ - return (!in_lto_p && !flag_auto_profile + /* When profile instrumentation, use or test coverage shall be performed. */ + return (!in_lto_p && (flag_branch_probabilities || flag_test_coverage || coverage_instrumentation_p ()) && !seen_error ());