On Wed, 12 Sep 2018, Jakub Jelinek wrote: > Hi! > > This patch optimizes signed x % C1 == C2 expansion if it is beneficial. > x % C1 == 0 should be handled unconditionally in match.pd (previous patch), > this only handles the cases where C1 is positive power of two and abs (C2) > is smaller than it and non-zero. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. > 2018-09-12 Jakub Jelinek <ja...@redhat.com> > Kyrylo Tkachov <kyrylo.tkac...@arm.com> > > PR middle-end/87290 > * expr.c (maybe_optimize_pow2p_mod_cmp): New function. > (maybe_optimize_mod_cmp): Use it if integer_pow2p treeop1. > > * gcc.target/i386/pr87290.c: New test. > * gcc.c-torture/execute/pr87290.c: New test. > > --- gcc/expr.c.jj 2018-09-12 15:20:37.448807135 +0200 > +++ gcc/expr.c 2018-09-12 18:16:41.000000000 +0200 > @@ -11523,6 +11523,98 @@ mod_inv (const wide_int &a, const wide_i > return x1; > } > > +/* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2 > + is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)): > + for C2 > 0 to x & C3 == C2 > + for C2 < 0 to x & C3 == (C2 & C3). */ > +enum tree_code > +maybe_optimize_pow2p_mod_cmp (enum tree_code code, tree *arg0, tree *arg1) > +{ > + gimple *stmt = get_def_for_expr (*arg0, TRUNC_MOD_EXPR); > + tree treeop0 = gimple_assign_rhs1 (stmt); > + tree treeop1 = gimple_assign_rhs2 (stmt); > + tree type = TREE_TYPE (*arg0); > + scalar_int_mode mode; > + if (!is_a <scalar_int_mode> (TYPE_MODE (type), &mode)) > + return code; > + if (GET_MODE_BITSIZE (mode) != TYPE_PRECISION (type) > + || TYPE_PRECISION (type) <= 1 > + || TYPE_UNSIGNED (type) > + /* Signed x % c == 0 should have been optimized into unsigned modulo > + earlier. */ > + || integer_zerop (*arg1) > + /* If c is known to be non-negative, modulo will be expanded as > unsigned > + modulo. */ > + || get_range_pos_neg (treeop0) == 1) > + return code; > + > + /* x % c == d where d < 0 && d <= -c should be always false. */ > + if (tree_int_cst_sgn (*arg1) == -1 > + && -wi::to_widest (treeop1) >= wi::to_widest (*arg1)) > + return code; > + > + int prec = TYPE_PRECISION (type); > + wide_int w = wi::to_wide (treeop1) - 1; > + w |= wi::shifted_mask (0, prec - 1, true, prec); > + tree c3 = wide_int_to_tree (type, w); > + tree c4 = *arg1; > + if (tree_int_cst_sgn (*arg1) == -1) > + c4 = wide_int_to_tree (type, w & wi::to_wide (*arg1)); > + > + rtx op0 = expand_normal (treeop0); > + treeop0 = make_tree (TREE_TYPE (treeop0), op0); > + > + bool speed_p = optimize_insn_for_speed_p (); > + > + do_pending_stack_adjust (); > + > + location_t loc = gimple_location (stmt); > + struct separate_ops ops; > + ops.code = TRUNC_MOD_EXPR; > + ops.location = loc; > + ops.type = TREE_TYPE (treeop0); > + ops.op0 = treeop0; > + ops.op1 = treeop1; > + ops.op2 = NULL_TREE; > + start_sequence (); > + rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type), > + EXPAND_NORMAL); > + rtx_insn *moinsns = get_insns (); > + end_sequence (); > + > + unsigned mocost = seq_cost (moinsns, speed_p); > + mocost += rtx_cost (mor, mode, EQ, 0, speed_p); > + mocost += rtx_cost (expand_normal (*arg1), mode, EQ, 1, speed_p); > + > + ops.code = BIT_AND_EXPR; > + ops.location = loc; > + ops.type = TREE_TYPE (treeop0); > + ops.op0 = treeop0; > + ops.op1 = c3; > + ops.op2 = NULL_TREE; > + start_sequence (); > + rtx mur = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type), > + EXPAND_NORMAL); > + rtx_insn *muinsns = get_insns (); > + end_sequence (); > + > + unsigned mucost = seq_cost (muinsns, speed_p); > + mucost += rtx_cost (mur, mode, EQ, 0, speed_p); > + mucost += rtx_cost (expand_normal (c4), mode, EQ, 1, speed_p); > + > + if (mocost <= mucost) > + { > + emit_insn (moinsns); > + *arg0 = make_tree (TREE_TYPE (*arg0), mor); > + return code; > + } > + > + emit_insn (muinsns); > + *arg0 = make_tree (TREE_TYPE (*arg0), mur); > + *arg1 = c4; > + return code; > +} > + > /* Attempt to optimize unsigned (X % C1) == C2 (or (X % C1) != C2). > If C1 is odd to: > (X - C2) * C3 <= C4 (or >), where > @@ -11561,8 +11653,6 @@ maybe_optimize_mod_cmp (enum tree_code c > tree treeop1 = gimple_assign_rhs2 (stmt); > if (TREE_CODE (treeop0) != SSA_NAME > || TREE_CODE (treeop1) != INTEGER_CST > - /* x % pow2 is handled right already. */ > - || integer_pow2p (treeop1) > /* Don't optimize the undefined behavior case x % 0; > x % 1 should have been optimized into zero, punt if > it makes it here for whatever reason; > @@ -11572,6 +11662,11 @@ maybe_optimize_mod_cmp (enum tree_code c > || tree_int_cst_le (treeop1, *arg1)) > return code; > > + /* Unsigned x % pow2 is handled right already, for signed > + modulo handle it in maybe_optimize_pow2p_mod_cmp. */ > + if (integer_pow2p (treeop1)) > + return maybe_optimize_pow2p_mod_cmp (code, arg0, arg1); > + > tree type = TREE_TYPE (*arg0); > scalar_int_mode mode; > if (!is_a <scalar_int_mode> (TYPE_MODE (type), &mode)) > --- gcc/testsuite/gcc.target/i386/pr87290.c.jj 2018-09-12 > 18:32:57.575345992 +0200 > +++ gcc/testsuite/gcc.target/i386/pr87290.c 2018-09-12 18:32:44.462560195 > +0200 > @@ -0,0 +1,34 @@ > +/* PR middle-end/87290 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att" } */ > +/* { dg-final { scan-assembler-times "and\[^\n\r]*-2147483633" 4 } } */ > +/* { dg-final { scan-assembler-times "\t\\\$13," 2 } } */ > +/* { dg-final { scan-assembler-times "\t\\\$-2147483645," 2 } } */ > + > +void f0 (void); > + > +int > +f1 (int x) > +{ > + return x % 16 == 13; > +} > + > +int > +f2 (int x) > +{ > + return x % 16 == -13; > +} > + > +void > +f3 (int x) > +{ > + if (x % 16 == 13) > + f0 (); > +} > + > +void > +f4 (int x) > +{ > + if (x % 16 == -13) > + f0 (); > +} > --- gcc/testsuite/gcc.c-torture/execute/pr87290.c.jj 2018-09-12 > 18:44:30.327029520 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr87290.c 2018-09-12 > 18:43:55.191603472 +0200 > @@ -0,0 +1,63 @@ > +/* PR middle-end/87290 */ > + > +int c; > + > +__attribute__((noipa)) void > +f0 (void) > +{ > + c++; > +} > + > +__attribute__((noipa)) int > +f1 (int x) > +{ > + return x % 16 == 13; > +} > + > +__attribute__((noipa)) int > +f2 (int x) > +{ > + return x % 16 == -13; > +} > + > +__attribute__((noipa)) void > +f3 (int x) > +{ > + if (x % 16 == 13) > + f0 (); > +} > + > +__attribute__((noipa)) void > +f4 (int x) > +{ > + if (x % 16 == -13) > + f0 (); > +} > + > +int > +main () > +{ > + int i, j; > + for (i = -30; i < 30; i++) > + { > + if (f1 (13 + i * 16) != (i >= 0) || f2 (-13 + i * 16) != (i <= 0)) > + __builtin_abort (); > + f3 (13 + i * 16); > + if (c != (i >= 0)) > + __builtin_abort (); > + f4 (-13 + i * 16); > + if (c != 1 + (i == 0)) > + __builtin_abort (); > + for (j = 1; j < 16; j++) > + { > + if (f1 (13 + i * 16 + j) || f2 (-13 + i * 16 + j)) > + __builtin_abort (); > + f3 (13 + i * 16 + j); > + f4 (-13 + i * 16 + j); > + } > + if (c != 1 + (i == 0)) > + __builtin_abort (); > + c = 0; > + } > + return 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)