On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.gli...@inria.fr> wrote: > On Mon, 14 Apr 2014, Richard Biener wrote: > >>> + /* If the special case has a high probability, keep it. */ >>> + if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN) >> >> >> I suppose Honza has a comment on how to test this properly >> (not sure if ->probability or ->frequency is always initialized properly). >> for example single_likely_edge tests profile_status_for_fn != >> PROFILE_ABSENT (and uses a fixed probability value ...). >> Anyway, the comparison looks backwards to me, but maybe I'm >> missing sth - I'd use >= PROB_LIKELY ;) > > > Maybe the comment is confusing? middle_bb contains the expensive operation > (say a/b) that the special case skips entirely. If the division happens in > less than 50% of cases (that's the proba of the edge going from cond to > middle_bb), then doing the comparison+jump may be cheaper and I abort the > optimization. At least the testcase with __builtin_expect should prove that > I didn't do it backwards.
Ah, indeed. My mistake. > value-prof seems to use 50% as the cut-off where it may become interesting > to special case division, hence my choice of PROB_EVEN. I am not sure which > way you want to use PROB_LIKELY (80%). If we have more than 80% chances of > executing the division, always perform it? Or if we have more than 80% > chances of skipping the division, keep the branch? Ok, if it's from value-prof then that's fine. The patch is ok if Honza doesn't have any comments on whether it's ok to look at ->probability unconditionally. Thanks, Richard. > Attached is the latest version (passed the testsuite). > Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-phiopt1" } */ > + > +int f(int a, int b, int c) { > + if (c > 5) return c; > + if (a == 0) return b; > + return a + b; > +} > + > +unsigned rot(unsigned x, int n) { > + const int bits = __CHAR_BIT__ * __SIZEOF_INT__; > + return (n == 0) ? x : ((x << n) | (x >> (bits - n))); > +} > + > +unsigned m(unsigned a, unsigned b) { > + if (a == 0) > + return 0; > + else > + return a & b; > +} > + > +/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */ > +/* { dg-final { cleanup-tree-dump "phiopt1" } } */ > Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c (working copy) > @@ -0,0 +1,19 @@ > +/* { dg-do compile { target x86_64-*-* } } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +int f(int a, int b) { > + if (__builtin_expect(a == 0, 1)) return b; > + return a + b; > +} > + > +// optab_handler can handle if(b==1) but not a/b > +// so we consider a/b too expensive. > +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) { > + if (b == 1) > + return a; > + else > + return a / b; > +} > + > +/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > Index: gcc/tree-ssa-phiopt.c > =================================================================== > --- gcc/tree-ssa-phiopt.c (revision 209353) > +++ gcc/tree-ssa-phiopt.c (working copy) > @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void); > x = PHI (CONST, a) > > Gets replaced with: > bb0: > bb2: > t1 = a == CONST; > t2 = b > c; > t3 = t1 & t2; > x = a; > > + > + It also replaces > + > + bb0: > + if (a != 0) goto bb1; else goto bb2; > + bb1: > + c = a + b; > + bb2: > + x = PHI <c (bb1), b (bb0), ...>; > + > + with > + > + bb0: > + c = a + b; > + bb2: > + x = PHI <c (bb0), ...>; > + > ABS Replacement > --------------- > > This transformation, implemented in abs_replacement, replaces > > bb0: > if (a >= 0) goto bb2; else goto bb1; > bb1: > x = -a; > bb2: > @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con > if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) > return true; > > tmp = gimple_assign_rhs2 (def); > if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) > return true; > > return false; > } > > +/* Returns true if ARG is a neutral element for operation CODE > + on the RIGHT side. */ > + > +static bool > +neutral_element_p (tree_code code, tree arg, bool right) > +{ > + switch (code) > + { > + case PLUS_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + return integer_zerop (arg); > + > + case LROTATE_EXPR: > + case RROTATE_EXPR: > + case LSHIFT_EXPR: > + case RSHIFT_EXPR: > + case MINUS_EXPR: > + case POINTER_PLUS_EXPR: > + return right && integer_zerop (arg); > + > + case MULT_EXPR: > + return integer_onep (arg); > + > + case TRUNC_DIV_EXPR: > + case CEIL_DIV_EXPR: > + case FLOOR_DIV_EXPR: > + case ROUND_DIV_EXPR: > + case EXACT_DIV_EXPR: > + return right && integer_onep (arg); > + > + case BIT_AND_EXPR: > + return integer_all_onesp (arg); > + > + default: > + return false; > + } > +} > + > +/* Returns true if ARG is an absorbing element for operation CODE. */ > + > +static bool > +absorbing_element_p (tree_code code, tree arg) > +{ > + switch (code) > + { > + case BIT_IOR_EXPR: > + return integer_all_onesp (arg); > + > + case MULT_EXPR: > + case BIT_AND_EXPR: > + return integer_zerop (arg); > + > + default: > + return false; > + } > +} > + > +/* Returns true if the statement is cheap. The simple heuristic used here > + is that anything the optab knows is cheap. */ > + > +static bool > +is_cheap_stmt (gimple stmt) > +{ > + tree type; > + if (is_gimple_assign (stmt)) > + { > + type = TREE_TYPE (gimple_assign_rhs1 (stmt)); > + enum tree_code code = gimple_assign_rhs_code (stmt); > + optab op = optab_for_tree_code (code, type, optab_scalar); > + return (op != unknown_optab > + && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing); > + } > + else if (gimple_code (stmt) == GIMPLE_COND) > + { > + type = TREE_TYPE (gimple_cond_lhs (stmt)); > + enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE; > + return can_compare_p (code, TYPE_MODE (type), ccp_jump); > + } > + else > + gcc_unreachable (); > +} > + > /* The function value_replacement does the main work of doing the value > replacement. Return non-zero if the replacement is done. Otherwise > return > 0. If we remove the middle basic block, return 2. > BB is the basic block where the replacement is going to be done on. > ARG0 > is argument 0 from the PHI. Likewise for ARG1. */ > > static int > value_replacement (basic_block cond_bb, basic_block middle_bb, > edge e0, edge e1, gimple phi, > tree arg0, tree arg1) > @@ -833,23 +933,21 @@ value_replacement (basic_block cond_bb, > enum tree_code code; > bool emtpy_or_with_defined_p = true; > > /* If the type says honor signed zeros we cannot do this > optimization. */ > if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1)))) > return 0; > > /* If there is a statement in MIDDLE_BB that defines one of the PHI > arguments, then adjust arg0 or arg1. */ > - gsi = gsi_after_labels (middle_bb); > - if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi))) > - gsi_next_nondebug (&gsi); > + gsi = gsi_start_nondebug_after_labels_bb (middle_bb); > while (!gsi_end_p (gsi)) > { > gimple stmt = gsi_stmt (gsi); > tree lhs; > gsi_next_nondebug (&gsi); > if (!is_gimple_assign (stmt)) > { > emtpy_or_with_defined_p = false; > continue; > } > @@ -931,20 +1029,67 @@ value_replacement (basic_block cond_bb, > print_generic_expr (dump_file, gimple_phi_result (phi), 0); > fprintf (dump_file, " reduced for COND_EXPR in block %d to ", > cond_bb->index); > print_generic_expr (dump_file, arg, 0); > fprintf (dump_file, ".\n"); > } > return 1; > } > > } > + > + /* Now optimize (x != 0) ? x + y : y to just y. > + The following condition is too restrictive, there can easily be > another > + stmt in middle_bb, for instance a CONVERT_EXPR for the second > argument. */ > + gimple assign = last_and_only_stmt (middle_bb); > + if (!assign || gimple_code (assign) != GIMPLE_ASSIGN > + || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS > + || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) > + && !POINTER_TYPE_P (TREE_TYPE (arg0)))) > + return 0; > + > + /* assign may call a libgcc routine, which is slow. */ > + if (!is_cheap_stmt (assign) && is_cheap_stmt (cond)) > + return 0; > + > + /* If the special case has a high probability, keep it. */ > + if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN) > + return 0; > + > + tree lhs = gimple_assign_lhs (assign); > + tree rhs1 = gimple_assign_rhs1 (assign); > + tree rhs2 = gimple_assign_rhs2 (assign); > + enum tree_code code_def = gimple_assign_rhs_code (assign); > + tree cond_lhs = gimple_cond_lhs (cond); > + tree cond_rhs = gimple_cond_rhs (cond); > + > + if (((code == NE_EXPR && e1 == false_edge) > + || (code == EQ_EXPR && e1 == true_edge)) > + && arg0 == lhs > + && ((arg1 == rhs1 > + && operand_equal_for_phi_arg_p (rhs2, cond_lhs) > + && neutral_element_p (code_def, cond_rhs, true)) > + || (arg1 == rhs2 > + && operand_equal_for_phi_arg_p (rhs1, cond_lhs) > + && neutral_element_p (code_def, cond_rhs, false)) > + || (operand_equal_for_phi_arg_p (arg1, cond_rhs) > + && (operand_equal_for_phi_arg_p (rhs2, cond_lhs) > + || operand_equal_for_phi_arg_p (rhs1, cond_lhs)) > + && absorbing_element_p (code_def, cond_rhs)))) > + { > + gsi = gsi_for_stmt (cond); > + gimple_stmt_iterator gsi_from = gsi_for_stmt (assign); > + gsi_move_before (&gsi_from, &gsi); > + replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); > + return 2; > + } > + > return 0; > } > > /* The function minmax_replacement does the main work of doing the minmax > replacement. Return true if the replacement is done. Otherwise return > false. > BB is the basic block where the replacement is going to be done on. > ARG0 > is argument 0 from the PHI. Likewise for ARG1. */ > > static bool >