On Thu, Feb 2, 2017 at 12:29 PM, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this patches fixes profile updating in the ifcombine. This is not hard to do > and ifcombine is #2 profile update offender out of tree passes (#1 is the > vectorizer). > > I think this counts as a regression, becuase one can trigger arbitrarily > bad profile after ifconversion and defnitly construct a testcase where this > will cause us optimize for size where we optimized for speed previously.
This might be related to PR 78200 but I could be wrong. Thanks, Andrew > > Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers > pick up the threading fix) unless there are complains. > > * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches. > > * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function. > (ifcombine_ifandif): Use it. > > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -14,3 +14,4 @@ int foo (int x, int a, int b) > } > > /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -17,3 +17,4 @@ int f(int x, int a, int b) > return t; > } > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > int g(void); > @@ -18,3 +18,4 @@ int f(int x, int a, int b) > } > > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized" } */ > +/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -17,3 +17,4 @@ int f(int x, int a, int b) > return t; > } > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O1 -fdump-tree-optimized" } */ > +/* { dg-options "-O1 -fdump-tree-optimized-details-blocks" } */ > /* { dg-additional-options "-mbranch-cost=2" { target { i?86-*-* x86_64-*-* > s390*-*-* avr*-*-* } } } */ > > _Bool f1(_Bool a, _Bool b) > @@ -18,3 +18,4 @@ _Bool f1(_Bool a, _Bool b) > /* For LOGICAL_OP_NON_SHORT_CIRCUIT, this should be optimized > into return a & b;, with no ifs. */ > /* { dg-final { scan-tree-dump-not "if" "optimized" { target { i?86-*-* > x86_64-*-* s390*-*-* avr*-*-* } } } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -20,3 +20,4 @@ doit: > } > > /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase extracted from PR15353. */ > > @@ -20,3 +20,4 @@ doit: > } > > /* { dg-final { scan-tree-dump ">=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase extracted from PR15353. */ > > @@ -18,3 +18,4 @@ void foo (int x, int a) > } > > /* { dg-final { scan-tree-dump "!=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase from PR15353. */ > > @@ -17,3 +17,4 @@ int f(int *i, int *j) > } > > /* { dg-final { scan-tree-dump ">=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ > > void bar (void); > > @@ -34,3 +34,4 @@ foo2 (unsigned int a) > > /* { dg-final { scan-tree-dump "optimizing bits or bits test" "ifcombine" } > } */ > /* { dg-final { scan-tree-dump "optimizing double bit test" "ifcombine" } } > */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ > > int test1 (int i, int j) > { > @@ -12,3 +12,4 @@ int test1 (int i, int j) > /* The above should be optimized to a i > j test by ifcombine. */ > > /* { dg-final { scan-tree-dump " > " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine-details-blocks" > } */ > > double test1 (double i, double j) > { > @@ -22,3 +22,4 @@ plouf: > Instead we get u<=, which is acceptable with -fno-trapping-math. */ > > /* { dg-final { scan-tree-dump " u<= " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine" } */ > +/* { dg-options "-O2 -fno-trapping-math > -fdump-tree-ifcombine-details-blocks" } */ > > void f (); > enum Sign { NEG=-1, ZERO, POS }; > @@ -19,3 +19,4 @@ void g (double x) > The transformation would also be legal with -ftrapping-math. */ > > /* { dg-final { scan-tree-dump "optimizing.* < " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-ethread" } */ > +char *c; > +int t() > +{ > + for (int i=0;i<5000;i++) > + c[i]=i; > +} > +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 > "ethread"} } */ > Index: tree-ssa-ifcombine.c > =================================================================== > --- tree-ssa-ifcombine.c (revision 245134) > +++ tree-ssa-ifcombine.c (working copy) > @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree * > return true; > } > > + > +/* Update profile after code in outer_cond_bb was adjuted so > + outer_cond_bb has no condition. */ > + > +static void > +update_profile_after_ifcombine (basic_block inner_cond_bb, > + basic_block outer_cond_bb) > +{ > + edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb); > + edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner > + ? EDGE_SUCC (outer_cond_bb, 1) > + : EDGE_SUCC (outer_cond_bb, 0)); > + edge inner_taken = EDGE_SUCC (inner_cond_bb, 0); > + edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1); > + > + if (inner_taken->dest != outer2->dest) > + std::swap (inner_taken, inner_not_taken); > + gcc_assert (inner_taken->dest == outer2->dest); > + > + /* In the following we assume that inner_cond_bb has single predecessor. > */ > + gcc_assert (single_pred_p (inner_cond_bb)); > + > + /* Path outer_cond_bb->(outer2) needs to be merged into path > + outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) > + and probability of inner_not_taken updated. */ > + > + outer_to_inner->count = outer_cond_bb->count; > + inner_cond_bb->count = outer_cond_bb->count; > + inner_taken->count += outer2->count; > + outer2->count = 0; > + > + inner_taken->probability = outer2->probability > + + RDIV (outer_to_inner->probability > + * inner_taken->probability, > + REG_BR_PROB_BASE); > + if (inner_taken->probability > REG_BR_PROB_BASE) > + inner_taken->probability = REG_BR_PROB_BASE; > + inner_not_taken->probability = REG_BR_PROB_BASE > + - inner_taken->probability; > + > + outer_to_inner->probability = REG_BR_PROB_BASE; > + inner_cond_bb->frequency = outer_cond_bb->frequency; > + outer2->probability = 0; > +} > + > /* If-convert on a and pattern with a common else block. The inner > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. > inner_inv, outer_inv and result_inv indicate whether the conditions > @@ -394,6 +439,7 @@ ifcombine_ifandif (basic_block inner_con > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > { > fprintf (dump_file, "optimizing double bit test to "); > @@ -471,6 +518,7 @@ ifcombine_ifandif (basic_block inner_con > gimple_cond_set_condition_from_tree (outer_cond, > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > { > @@ -554,6 +602,7 @@ ifcombine_ifandif (basic_block inner_con > gimple_cond_set_condition_from_tree (outer_cond, > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > {