Hi,
this patch implements -fprofile-consistency-report that is useful to get an
statistic about what pass are major offenders in keeping profile up-to-date.

For example the following is output for combine.c
 Pass: fnsplit              (after pass) mismatched in:  +16 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: fnsplit              (after TODO) mismatched in:  -16 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: inline               (after pass) mismatched in: +197 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: inline               (after TODO) mismatched in: -209 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ccp                  (after TODO) mismatched in:   +8 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after pass) mismatched in: +191 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after TODO) mismatched in:  +25 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dce                  (after TODO) mismatched in:  -19 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cdce                 (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cselim               (after pass) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ifcombine            (after TODO) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phiopt               (after pass) mismatched in:   -2 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ch                   (after pass) mismatched in:   +2 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ch                   (after TODO) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after pass) mismatched in:  +89 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after TODO) mismatched in:   -3 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phicprop             (after TODO) mismatched in:   -6 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dce                  (after TODO) mismatched in:   -2 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: copyprop             (after TODO) mismatched in:  -17 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: unswitch             (after pass) mismatched in:   +7 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: unswitch             (after TODO) mismatched in:  +19 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cunroll              (after pass) mismatched in:  +10 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after pass) mismatched in:  +18 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: vrp                  (after TODO) mismatched in:   -8 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after pass) mismatched in:  +14 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dom                  (after TODO) mismatched in:   +4 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: phicprop             (after TODO) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cddce                (after TODO) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: expand               (after pass) mismatched in: +435 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: jump                 (after pass) mismatched in:   +6 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cse1                 (after pass) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cprop                (after pass) mismatched in:   -8 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: rtl pre              (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: cse_local            (after pass) mismatched in:   -7 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ce1                  (after pass) mismatched in:   +5 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: loop2_init           (after pass) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: loop2_done           (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: reload               (after pass) mismatched in:   -4 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: gcse2                (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: split2               (after pass) mismatched in:   +1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: pro_and_epilogue     (after pass) mismatched in:   +4 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: dse2                 (after pass) mismatched in:   -2 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: jump2                (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: ce3                  (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: bbro                 (after pass) mismatched in:   -6 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)
 Pass: compgotos            (after pass) mismatched in:   -1 (freqs)   +0 
(counts); michmatched out:   +0 (freqs)   +0 (counts)

As a quick explanation, inliner is recomputing frequencies as part of TODO, so
the divergence.  VRP and DOM breaks a lot of probabilities by jump threading
where the original estimate was inconsistent (it would make sense to do some of
jump threading prior profile estimate).
Expansion obviously needs a lot of TLC, not only in switch expansion that
is currently all wrong and quite commonly excercised in combine.c

Regtested/bootstrapped x86_64-linux.  If there are no complains I will commit
the patch tomorrow.

Honza
        * doc/invoke.texi (-fprofile-consistency-report): Document.
        * common.opt (fprofile-consistency-report): New.
        * toplev.h (dump_profile_consistency_report): Declare.
        * toplev.c (finalize): Call dump_profile_consistency_report.
        * passes.c (profile_record): New global var.
        (check_profile_consistency): New function.
        (dump_profile_consistency_report): New function.
        (execute_one_ipa_transform_pass): Call check_profile_consistency.
        (execute_one_pass): Likewise.
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi     (revision 191852)
+++ doc/invoke.texi     (working copy)
@@ -386,7 +386,7 @@ Objective-C and Objective-C++ Dialects}.
 -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
--fprefetch-loop-arrays @gol
+-fprefetch-loop-arrays -fprofile-consistency-report @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
@@ -5149,6 +5149,11 @@ allocation for the WPA phase only.
 Makes the compiler print some statistics about permanent memory
 allocation before or after interprocedural optimization.
 
+@item -fprofile-consistency-report
+@opindex fprofile-consistency-report
+Makes the compiler print some statistics about consistency of the
+(estimated) profile.
+
 @item -fstack-usage
 @opindex fstack-usage
 Makes the compiler output stack usage information for the program, on a
Index: common.opt
===================================================================
--- common.opt  (revision 191852)
+++ common.opt  (working copy)
@@ -1649,6 +1649,10 @@ fprofile-values
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions
 
+fprofile-consistency-report
+Common Report Var(profile_report)
+Report on consistency of profile
+
 frandom-seed
 Common Var(common_deferred_options) Defer
 
Index: toplev.c
===================================================================
--- toplev.c    (revision 191852)
+++ toplev.c    (working copy)
@@ -1815,6 +1815,9 @@ finalize (bool no_backend)
   if (mem_report)
     dump_memory_report (true);
 
+  if (profile_report)
+    dump_profile_consistency_report ();
+
   /* Language-specific end of compilation actions.  */
   lang_hooks.finish ();
 }
Index: passes.c
===================================================================
--- passes.c    (revision 191852)
+++ passes.c    (working copy)
@@ -1782,6 +1784,108 @@ execute_function_dump (void *data ATTRIB
     }
 }
 
+/* Hold statistic about profile consistency.  */
+
+struct profile_record
+{
+  int num_mismatched_freq_in[2];
+  int num_mismatched_freq_out[2];
+  int num_mismatched_count_in[2];
+  int num_mismatched_count_out[2];
+  bool tested;
+};
+
+static struct profile_record *profile_record;
+
+/* Account profile inconsistencies for pass INDEX. If SUBPASS is non-zero, the
+   accounting happens after TODO.  */
+
+static void
+check_profile_consistency (int index, int subpass)
+{
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  int sum;
+  gcov_type lsum;
+
+  if (index == -1)
+    return;
+  if (!profile_record)
+    profile_record = XCNEWVEC (struct profile_record,
+                              passes_by_id_size);
+  gcc_assert (index < passes_by_id_size && index >= 0);
+  gcc_assert (subpass < 2);
+  profile_record[index].tested = true;
+
+  FOR_ALL_BB (bb)
+   {
+      if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
+       {
+         sum = 0;
+         FOR_EACH_EDGE (e, ei, bb->succs)
+           sum += e->probability;
+         if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
+           profile_record[index].num_mismatched_freq_out[subpass]++;
+         lsum = 0;
+         FOR_EACH_EDGE (e, ei, bb->succs)
+           lsum += e->count;
+         if (EDGE_COUNT (bb->succs)
+             && (lsum - bb->count > 100 || lsum - bb->count < -100))
+           profile_record[index].num_mismatched_count_out[subpass]++;
+       }
+       if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun))
+         {
+           sum = 0;
+           FOR_EACH_EDGE (e, ei, bb->preds)
+             sum += EDGE_FREQUENCY (e);
+           if (abs (sum - bb->frequency) > 100
+               || (MAX (sum, bb->frequency) > 10
+                   && abs ((sum - bb->frequency) * 100 / (MAX (sum, 
bb->frequency) + 1)) > 10))
+             profile_record[index].num_mismatched_freq_in[subpass]++;
+           lsum = 0;
+           FOR_EACH_EDGE (e, ei, bb->preds)
+             lsum += e->count;
+           if (lsum - bb->count > 100 || lsum - bb->count < -100)
+             profile_record[index].num_mismatched_count_in[subpass]++;
+         }
+   }
+}
+
+/* Output profile consistency.  */
+
+void
+dump_profile_consistency_report (void)
+{
+  int i, j;
+  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 
0;
+
+  if (!profile_record)
+    return;
+  fprintf (stderr, "\nProfile consistency report:\n");
+  for (i = 0; i < passes_by_id_size; i++)
+    for (j = 0 ; j < 2; j++)
+      if ((profile_record[i].num_mismatched_freq_in[j] != last_freq_in
+          || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
+          || profile_record[i].num_mismatched_count_in[j] != last_count_in
+          || profile_record[i].num_mismatched_count_out[j] != last_count_out)
+         && profile_record[i].tested)
+      {
+       fprintf (stderr," Pass: %-20s %s mismatched in: %+4i (freqs) %+4i 
(counts); "
+                "michmatched out: %+4i (freqs) %+4i (counts)\n",
+                passes_by_id [i]->name,
+                j ? "(after TODO)" : "(after pass)",
+                profile_record[i].num_mismatched_freq_in[j] - last_freq_in,
+                profile_record[i].num_mismatched_count_in[j] - last_count_in,
+                profile_record[i].num_mismatched_freq_out[j] - last_freq_out,
+                profile_record[i].num_mismatched_count_out[j] - 
last_count_out);
+       last_freq_in = profile_record[i].num_mismatched_freq_in[j];
+       last_freq_out = profile_record[i].num_mismatched_freq_out[j];
+       last_count_in = profile_record[i].num_mismatched_count_in[j];
+       last_count_out = profile_record[i].num_mismatched_count_out[j];
+      }
+}
+
 /* Perform all TODO actions that ought to be done on each function.  */
 
 static void
@@ -2050,9 +2150,16 @@ execute_one_ipa_transform_pass (struct c
   if (pass->tv_id != TV_NONE)
     timevar_pop (pass->tv_id);
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 0);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after);
   verify_interpass_invariants ();
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 1);
 
   do_per_function (execute_function_dump, NULL);
   pass_fini_dump_file (pass);
@@ -2218,8 +2325,16 @@ execute_one_pass (struct opt_pass *pass)
       clean_graph_dump_file (dump_file_name);
     }
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 0);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after | pass->todo_flags_finish);
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg)
+      && profile_status != PROFILE_ABSENT)
+    check_profile_consistency (pass->static_pass_number, 1);
+
   verify_interpass_invariants ();
   do_per_function (execute_function_dump, NULL);
   if (pass->type == IPA_PASS)
Index: toplev.h
===================================================================
--- toplev.h    (revision 191852)
+++ toplev.h    (working copy)
@@ -49,6 +49,7 @@ extern void emit_debug_global_declaratio
 extern void write_global_declarations (void);
 
 extern void dump_memory_report (bool);
+extern void dump_profile_consistency_report (void);
 
 extern void target_reinit (void);
 

Reply via email to