Hi, This patch is for pr105735/pr101991. It will enable below optimization: { - long unsigned int bit; - - <bb 2> [local count: 32534376]: - - <bb 3> [local count: 1041207449]: - # tmp_10 = PHI <tmp_7(5), tmp_4(D)(2)> - # bit_12 = PHI <bit_8(5), 0(2)> - tmp_7 = bit2_6(D) & tmp_10; - bit_8 = bit_12 + 1; - if (bit_8 != 32) - goto <bb 5>; [96.97%] - else - goto <bb 4>; [3.03%] - - <bb 5> [local count: 1009658865]: - goto <bb 3>; [100.00%] - - <bb 4> [local count: 32534376]: - # tmp_11 = PHI <tmp_7(3)> - return tmp_11; + tmp_11 = tmp_4(D) & bit2_6(D); + return tmp_11;
} Ok for master ? gcc/ChangeLog: PR middle-end/105735 * match.pd (bitop_with_inv_p): New match. * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare. (analyze_and_compute_bitop_with_inv_effect): New function. (final_value_replacement_loop): Enhanced to handle bitop with inv induction. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105735-1.c: New test. * gcc.target/i386/pr105735-2.c: New test. --- gcc/match.pd | 4 + gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++++++ gcc/tree-scalar-evolution.cc | 59 +++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8050,6 +8050,10 @@ and, (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3)))) +(for bit_op (bit_and bit_ior bit_xor) + (match (bitop_with_inv_p @0 @1) + (bit_op:c @0 @1))) + /* n - (((n > C1) ? n : C1) & -C2) -> n & C1 for unsigned case. n - (((n > C1) ? n : C1) & -C2) -> (n <= C1) ? n : (n & C1) for signed case. */ (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c b/gcc/testsuite/gcc.target/i386/pr105735-1.c new file mode 100644 index 00000000000..8d2123ed351 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c @@ -0,0 +1,88 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" +} } */ + +unsigned long long +__attribute__((noipa)) +foo (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) + tmp &= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo1 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 63; bit >= 0; bit -=3) + tmp &= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo2 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) + tmp |= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo3 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 63; bit >= 0; bit -=3) + tmp |= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo4 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) + tmp ^= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo5 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 63; bit++) + tmp ^= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +f (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) + res &= bit2; + return res; +} + +unsigned long long +__attribute__((noipa)) +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) + res |= bit2; + return res; +} + +unsigned long long +__attribute__((noipa)) +f2 (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) + res ^= bit2; + return res; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr105735-2.c b/gcc/testsuite/gcc.target/i386/pr105735-2.c new file mode 100644 index 00000000000..79c1d300b1b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-2.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +#include "pr105735-1.c" + +int main() +{ + unsigned long long tmp = 0x1101101ULL; + unsigned long long bit2 = 0x1111111011110111ULL; + if (foo (tmp, bit2) != 0x1100101ULL) + __builtin_abort (); + if (foo1 (tmp, bit2) != 0x1100101ULL) + __builtin_abort (); + if (foo2 (tmp, bit2) != 0x1111111011111111ULL) + __builtin_abort (); + if (foo3 (tmp, bit2) != 0x1111111011111111ULL) + __builtin_abort (); + if (foo4 (tmp, bit2) != 0x1101101ULL) + __builtin_abort (); + if (foo5 (tmp, bit2) != 0x1111111010011010ULL) + __builtin_abort (); + if (f (tmp, 64, bit2) != 0x1100101ULL) + __builtin_abort (); + if (f1 (tmp, 64, bit2) != 0x1111111011111111ULL) + __builtin_abort (); + if (f2 (tmp, 64, bit2) != 0x1101101ULL) + __builtin_abort (); +} diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index fc59d035b19..81220f08d2e 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3635,6 +3635,53 @@ enum bit_op_kind return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits)); } +/* Match.pd function to match bitop with invariant expression + .i.e. + tmp_7 = _0 & _1; */ +extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree)); + +/* Return the inductive expression of bitop with invariant if possible, + otherwise returns DEF. */ +static tree +analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, + tree niter) +{ + tree match_op[2],inv; + tree type = TREE_TYPE (phidef); + gphi* header_phi = NULL; + /* match thing like op0 (match[0]), op1(match[1]), phidef(PHIDEF) + + op1 = PHI <phidef, inv> + phidef = op0 & op1 + if op0 is an invariant, it could change to + phidef = op0 & inv. */ + if (!gimple_bitop_with_inv_p (phidef, &match_op[0], NULL) + || TREE_CODE (match_op[1]) != SSA_NAME + || !expr_invariant_in_loop_p (loop, match_op[0]) + || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1]))) + || gimple_phi_num_args (header_phi) != 2) + return NULL_TREE; + + if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef) + return NULL_TREE; + + enum tree_code code1 + = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef)); + + if (code1 == BIT_XOR_EXPR) + { + if (!tree_fits_uhwi_p (niter)) + return NULL_TREE; + unsigned HOST_WIDE_INT niter_num; + niter_num = tree_to_uhwi (niter); + if (niter_num % 2 != 0) + match_op[0] = build_zero_cst (type); + } + + inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop)); + return fold_build2 (code1, type, inv, match_op[0]); } + /* Do final value replacement for LOOP, return true if we did anything. */ bool @@ -3687,6 +3734,18 @@ final_value_replacement_loop (class loop *loop) &folded_casts); def = compute_overall_effect_of_inner_loop (ex_loop, def); + /* Handle bitop with invariant induction expression. + + .i.e + for (int i =0 ;i < 32; i++) + tmp &= bit2; + if bit2 is an invariant in loop which could simple to + tmp &= bit2. */ + tree bitinv_def; + if ((bitinv_def + = analyze_and_compute_bitop_with_inv_effect (loop, phidef, niter))) + def = bitinv_def; + /* Handle bitwise induction expression. .i.e. -- 2.18.2