> > Index: passes.c > > +/* Hold statistic about profile consistency. */ > ... > > I don't see why this should live in passes.c, can you please put it in > a more logical place (profile.c, perhaps)?
Hmm, the problem here is that the code is using passmanager's dumping bits to order the passes and assign them names. So moving it elsewhere requires exporting it that is not nice. passes.c does similar stuff already, so I decided to keep it there. Here is patch I comitted that prints more detailed report. It reports also changes in overall unit time/size estimates and when pass did nothing it reports it as uneffective. For tramp3d it looks this way: Pass name |mismatch in |mismated out|Overall |freq count |freq count |size time cfg (after TODO)| | | -1.5474% ssa | | | -2.0148% inline_param ------------| | | einline | | | -0.4991% einline (after TODO)| | | -0.0129% early_optimizations ------------| | | copyrename ------------| | | ccp | | | -0.2273% forwprop | | | -0.0688% ealias ------------| | | esra | | | -0.1892% fre | | | -7.9369% copyprop (after TODO)| | | -0.0187% mergephi ------------| | | cddce | | | -0.1655% eipa_sra | | | -0.0237% tailr | | | -0.5305% switchconv | | | +0.0190% profile_estimate | +1 | | local-pure-const ------------| | | fnsplit | +20 | | +0.2333% +0.8267% fnsplit (after TODO)| -20 | | -1.8146% -1.3300% release_ssa ------------| | | inline_param ------------| | | inline | +229 | | +14.5138% -10.6020% inline (after TODO)| -225 | | -0.2662% -10.2195% copyrename | | | -2.7026% -2.9448% cunrolli ------------| | | ccp | | | -0.0174% -0.0142% ccp (after TODO)| +8 | | -0.3571% -0.1213% forwprop | | | -0.1442% -0.5343% alias ------------| | | retslot ------------| | | phiprop ------------| | | fre | | | -0.2801% -0.2814% fre (after TODO)| | | -0.2897% -0.0362% copyprop | | | -0.0264% -0.0251% mergephi ------------| | | vrp | +80 | | +0.1233% -1.0271% vrp (after TODO)| +3 | | -0.8618% -0.2421% dce | | | -0.0089% -0.0021% dce (after TODO)| -7 | | -0.0089% -0.0347% cdce ------------| | | cselim | | | -0.0177% +0.0000% ifcombine | | | +0.0044% +0.0001% ifcombine (after TODO)| | | -0.0089% +0.0000% phiopt | | | -0.1730% -0.1203% tailr ------------| | | ch | +2 | | +2.0535% +0.0000% ch (after TODO)| | | +0.0002% cplxlower ------------| | | sra | | | +0.0087% copyrename ------------| | | dom | +86 | | +1.1802% -0.1366% dom (after TODO)| +5 | | -0.1894% -0.3042% phicprop | | | -0.0043% phicprop (after TODO)| -6 | | +0.2117% dse | | | -0.0086% -0.0009% reassoc | | | +0.0302% +0.0116% dce | | | -0.6640% -0.1408% dce (after TODO)| -2 | | forwprop | | | -0.1129% -0.1826% phiopt ------------| | | objsz ------------| | | strlen ------------| | | ccp (after TODO)| | | -0.0087% copyprop ------------| | | sincos ------------| | | bswap ------------| | | crited ------------| | | pre | +1 | | +1.2168% -1.3703% pre (after TODO)| -2 | | -0.0215% +0.0764% sink | | | -0.0256% loop ------------| | | loopinit ------------| | | lim | | | +0.0086% -0.1529% copyprop (after TODO)| -11 | | +0.1483% dceloop | | | -0.0172% -0.0028% unswitch | +7 | | +1.0265% +0.0404% unswitch (after TODO)| +21 | | -0.5569% -0.7164% sccp ------------| | | ldist ------------| | | copyprop ------------| | | ivcanon | | | +0.0214% +0.0661% ifcvt ------------| | | vect ------------| | | dceloop | | | -0.0085% -0.0009% pcom | | | -0.0002% cunroll | -2 | | +0.8891% -0.0781% slp ------------| | | ivopts | | | +0.1822% +0.0837% lim ------------| | | loopdone ------------| | | veclower2 ------------| | | reassoc | | | -0.0042% -0.0169% vrp | +17 | | -0.1015% -0.2523% vrp (after TODO)| +1 | | -0.2540% -0.3505% slsr | | | +0.1952% +0.6254% dom | +16 | | -0.0254% -0.6641% dom (after TODO)| +5 | | -0.0424% -0.0243% phicprop | | | -0.0170% phicprop (after TODO)| +2 | | +0.0141% cddce | | | -0.1229% -0.0515% cddce (after TODO)| -1 | | -0.0085% dse ------------| | | forwprop | | | -0.0382% -0.0254% phiopt | | | -0.0255% -0.0462% fab ------------| | | widening_mul ------------| | | tailc ------------| | | copyrename ------------| | | uncprop ------------| | | local-pure-const ------------| | | nrv ------------| | | optimized ------------| | | expand | +430 | |---------- vregs ------------| | | into_cfglayout | | | -4.4596% -3.2103% jump | +6 | | -0.5852% -0.5775% subreg1 | | | -0.0589% dfinit ------------| | | cse1 | | | -0.0966% -0.0620% fwprop1 | | | -2.5152% -3.0170% cprop | -8 | | -0.4209% -0.4256% rtl pre | | | +0.8649% +0.7079% hoist ------------| | | cprop | -7 | | -0.9099% -1.0555% cse_local | | | -0.1787% -0.2454% ce1 | -1 | | +0.0913% +0.3673% reginfo ------------| | | loop2 ------------| | | loop2_init ------------| | | loop2_invariant | | | +0.0523% +0.1014% loop2_unswitch ------------| | | loop2_done ------------| | | cprop | | | -0.3951% -0.2580% cse2 | | | -0.3698% -0.3355% dse1 | | | -0.0025% -0.0011% fwprop2 | | | -0.0012% -0.0013% init-regs ------------| | | ud_dce | | | -0.0784% -0.0126% combine | | | -3.1886% -3.4658% ce2 | | | +0.0001% regmove | | | +0.0025% +0.0000% outof_cfglayout | | | +3.9667% +1.8602% split1 | | | +0.0828% +0.0878% subreg2 ------------| | | mode_sw ------------| | | asmcons ------------| | | ira | | | +3.0210% +0.5602% reload | -4 | | -2.3169% -3.8620% postreload | | | -0.3193% -0.0548% gcse2 | | | -0.0170% -0.0051% split2 | | | +0.2646% +0.3206% ree | | | +0.0012% +0.0175% pro_and_epilogue | +4 | | +3.8163% +18.4507% dse2 ------------| | | csa ------------| | | jump2 | | | -4.4781% -0.0612% peephole2 | | | +0.0073% -0.0539% ce3 | | | -0.0049% +0.0092% cprop_hardreg ------------| | | rtl_dce | | | -0.0903% -0.0736% bbro | -7 | | +0.4776% -1.0224% split4 ------------| | | sched2 ------------| | | stack ------------| | | alignments ------------| | | compgotos ------------| | | Honza * doc/invoke.texi (-fprofile-report): Document. * common.opt (-fprofile-report): New option. * toplev.c (finalize): Call dump_profile_report. * toplev.h (profile_report): Declare. * passes.c (profile_record): New static var. (check_profile_consistency): New function. (dump_profile_record): New function. (execute_one_ipa_transform_pass): Call check_profile_consistency. (execute_one_pass): Likewise. Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 192116) +++ doc/invoke.texi (working copy) @@ -388,7 +388,7 @@ Objective-C and Objective-C++ Dialects}. -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol --fprefetch-loop-arrays @gol +-fprefetch-loop-arrays -fprofile-report @gol -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol -fprofile-generate=@var{path} @gol -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol @@ -5153,6 +5153,11 @@ allocation for the WPA phase only. Makes the compiler print some statistics about permanent memory allocation before or after interprocedural optimization. +@item -fprofile-report +@opindex fprofile-report +Makes the compiler print some statistics about consistency of the +(estimated) profile and effect of individual passes. + @item -fstack-usage @opindex fstack-usage Makes the compiler output stack usage information for the program, on a Index: common.opt =================================================================== --- common.opt (revision 192116) +++ common.opt (working copy) @@ -1654,6 +1654,10 @@ fprofile-values Common Report Var(flag_profile_values) Insert code to profile values of expressions +fprofile-report +Common Report Var(profile_report) +Report on consistency of profile + frandom-seed Common Var(common_deferred_options) Defer Index: toplev.c =================================================================== --- toplev.c (revision 192116) +++ toplev.c (working copy) @@ -1815,6 +1815,9 @@ finalize (bool no_backend) if (mem_report) dump_memory_report (true); + if (dump_profile_report) + dump_profile_report (); + /* Language-specific end of compilation actions. */ lang_hooks.finish (); } Index: toplev.h =================================================================== --- toplev.h (revision 192116) +++ toplev.h (working copy) @@ -49,6 +49,7 @@ extern void emit_debug_global_declaratio extern void write_global_declarations (void); extern void dump_memory_report (bool); +extern void dump_profile_report (void); extern void target_reinit (void); Index: passes.c =================================================================== --- passes.c (revision 192116) +++ passes.c (working copy) @@ -1778,6 +1780,209 @@ execute_function_dump (void *data ATTRIB } } +/* Make statistic about profile consistency. */ + +struct profile_record +{ + int num_mismatched_freq_in[2]; + int num_mismatched_freq_out[2]; + int num_mismatched_count_in[2]; + int num_mismatched_count_out[2]; + bool run; + gcov_type time[2]; + int size[2]; +}; + +static struct profile_record *profile_record; + +static void +check_profile_consistency (int index, int subpass, bool run) +{ + basic_block bb; + edge_iterator ei; + edge e; + int sum; + gcov_type lsum; + + if (index == -1) + return; + if (!profile_record) + profile_record = XCNEWVEC (struct profile_record, + passes_by_id_size); + gcc_assert (index < passes_by_id_size && index >= 0); + gcc_assert (subpass < 2); + profile_record[index].run |= run; + + FOR_ALL_BB (bb) + { + if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun) + && profile_status != PROFILE_ABSENT) + { + sum = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + sum += e->probability; + if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100) + profile_record[index].num_mismatched_freq_out[subpass]++; + lsum = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + lsum += e->count; + if (EDGE_COUNT (bb->succs) + && (lsum - bb->count > 100 || lsum - bb->count < -100)) + profile_record[index].num_mismatched_count_out[subpass]++; + } + if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun) + && profile_status != PROFILE_ABSENT) + { + sum = 0; + FOR_EACH_EDGE (e, ei, bb->preds) + sum += EDGE_FREQUENCY (e); + if (abs (sum - bb->frequency) > 100 + || (MAX (sum, bb->frequency) > 10 + && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10)) + profile_record[index].num_mismatched_freq_in[subpass]++; + lsum = 0; + FOR_EACH_EDGE (e, ei, bb->preds) + lsum += e->count; + if (lsum - bb->count > 100 || lsum - bb->count < -100) + profile_record[index].num_mismatched_count_in[subpass]++; + } + if (bb == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun) + || bb == EXIT_BLOCK_PTR_FOR_FUNCTION (cfun)) + continue; + if ((cfun && (cfun->curr_properties & PROP_trees))) + { + gimple_stmt_iterator i; + + for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + { + profile_record[index].size[subpass] + += estimate_num_insns (gsi_stmt (i), &eni_size_weights); + if (profile_status == PROFILE_READ) + profile_record[index].time[subpass] + += estimate_num_insns (gsi_stmt (i), + &eni_time_weights) * bb->count; + else if (profile_status == PROFILE_GUESSED) + profile_record[index].time[subpass] + += estimate_num_insns (gsi_stmt (i), + &eni_time_weights) * bb->frequency; + } + } + else if (cfun && (cfun->curr_properties & PROP_rtl)) + { + rtx insn; + for (insn = NEXT_INSN (BB_HEAD (bb)); insn && insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + { + profile_record[index].size[subpass] + += insn_rtx_cost (PATTERN (insn), false); + if (profile_status == PROFILE_READ) + profile_record[index].time[subpass] + += insn_rtx_cost (PATTERN (insn), true) * bb->count; + else if (profile_status == PROFILE_GUESSED) + profile_record[index].time[subpass] + += insn_rtx_cost (PATTERN (insn), true) * bb->frequency; + } + } + } +} + +/* Output profile consistency. */ + +void +dump_profile_report (void) +{ + int i, j; + int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0; + gcov_type last_time, last_size; + double rel_time_change, rel_size_change; + int last_reported; + + if (!profile_record) + return; + fprintf (stderr, "\nProfile consistency report:\n\n"); + fprintf (stderr, "Pass name |mismatch in |mismated out|Overall\n"); + fprintf (stderr, " |freq count |freq count |size time\n"); + + for (i = 0; i < passes_by_id_size; i++) + for (j = 0 ; j < 2; j++) + if (profile_record[i].run) + { + if (last_time) + rel_time_change = (profile_record[i].time[j] + - (double)last_time) * 100 / (double)last_time; + else + rel_time_change = 0; + if (last_size) + rel_size_change = (profile_record[i].size[j] + - (double)last_size) * 100 / (double)last_size; + else + rel_size_change = 0; + + if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in + || profile_record[i].num_mismatched_freq_out[j] != last_freq_out + || profile_record[i].num_mismatched_count_in[j] != last_count_in + || profile_record[i].num_mismatched_count_out[j] != last_count_out + || rel_time_change || rel_size_change) + { + last_reported = i; + fprintf (stderr, "%-20s %s", + passes_by_id [i]->name, + j ? "(after TODO)" : " "); + if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in) + fprintf (stderr, "| %+5i", + profile_record[i].num_mismatched_freq_in[j] + - last_freq_in); + else + fprintf (stderr, "| "); + if (profile_record[i].num_mismatched_count_in[j] != last_count_in) + fprintf (stderr, " %+5i", + profile_record[i].num_mismatched_count_in[j] + - last_count_in); + else + fprintf (stderr, " "); + if (profile_record[i].num_mismatched_freq_out[j] != last_freq_out) + fprintf (stderr, "| %+5i", + profile_record[i].num_mismatched_freq_out[j] + - last_freq_out); + else + fprintf (stderr, "| "); + if (profile_record[i].num_mismatched_count_out[j] != last_count_out) + fprintf (stderr, " %+5i", + profile_record[i].num_mismatched_count_out[j] + - last_count_out); + else + fprintf (stderr, " "); + + /* Size/time units change across gimple and RTL. */ + if (i == pass_expand.pass.static_pass_number) + fprintf (stderr, "|----------"); + else + { + if (rel_size_change) + fprintf (stderr, "| %+8.4f%%", rel_size_change); + else + fprintf (stderr, "| "); + if (rel_time_change) + fprintf (stderr, " %+8.4f%%", rel_time_change); + } + fprintf (stderr, "\n"); + last_freq_in = profile_record[i].num_mismatched_freq_in[j]; + last_freq_out = profile_record[i].num_mismatched_freq_out[j]; + last_count_in = profile_record[i].num_mismatched_count_in[j]; + last_count_out = profile_record[i].num_mismatched_count_out[j]; + } + else if (j && last_reported != i) + { + last_reported = i; + fprintf (stderr, "%-20s ------------| | |\n", + passes_by_id [i]->name); + } + last_time = profile_record[i].time[j]; + last_size = profile_record[i].size[j]; + } +} + /* Perform all TODO actions that ought to be done on each function. */ static void @@ -2042,9 +2247,14 @@ execute_one_ipa_transform_pass (struct c if (pass->tv_id != TV_NONE) timevar_pop (pass->tv_id); + if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)) + check_profile_consistency (pass->static_pass_number, 0, true); + /* Run post-pass cleanup and verification. */ execute_todo (todo_after); verify_interpass_invariants (); + if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)) + check_profile_consistency (pass->static_pass_number, 1, true); do_per_function (execute_function_dump, NULL); pass_fini_dump_file (pass); @@ -2144,6 +2354,13 @@ execute_one_pass (struct opt_pass *pass) if (!gate_status) { + /* Run so passes selectively disabling themselves on a given function + are not miscounted. */ + if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)) + { + check_profile_consistency (pass->static_pass_number, 0, false); + check_profile_consistency (pass->static_pass_number, 1, false); + } current_pass = NULL; return false; } @@ -2210,8 +2427,14 @@ execute_one_pass (struct opt_pass *pass) clean_graph_dump_file (dump_file_name); } + if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)) + check_profile_consistency (pass->static_pass_number, 0, true); + /* Run post-pass cleanup and verification. */ execute_todo (todo_after | pass->todo_flags_finish); + if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)) + check_profile_consistency (pass->static_pass_number, 1, true); + verify_interpass_invariants (); do_per_function (execute_function_dump, NULL); if (pass->type == IPA_PASS)