Hi Richard, could you help to have a look for the patch ? > Hi, > > This patch is for pr105735/pr101991. It will enable below optimization: > { > - long unsigned int bit; > - > - <bb 2> [local count: 32534376]: > - > - <bb 3> [local count: 1041207449]: > - # tmp_10 = PHI <tmp_7(5), tmp_4(D)(2)> > - # bit_12 = PHI <bit_8(5), 0(2)> > - tmp_7 = bit2_6(D) & tmp_10; > - bit_8 = bit_12 + 1; > - if (bit_8 != 32) > - goto <bb 5>; [96.97%] > - else > - goto <bb 4>; [3.03%] > - > - <bb 5> [local count: 1009658865]: > - goto <bb 3>; [100.00%] > - > - <bb 4> [local count: 32534376]: > - # tmp_11 = PHI <tmp_7(3)> > - return tmp_11; > + tmp_11 = tmp_4(D) & bit2_6(D); > + return tmp_11; > > } > > Ok for master ? > > gcc/ChangeLog: > > PR middle-end/105735 > * match.pd (bitop_with_inv_p): New match. > * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare. > (analyze_and_compute_bitop_with_inv_effect): New function. > (final_value_replacement_loop): Enhanced to handle bitop > with inv induction. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr105735-1.c: New test. > * gcc.target/i386/pr105735-2.c: New test. > --- > gcc/match.pd | 4 + > gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++++++ > gcc/tree-scalar-evolution.cc | 59 +++++++++++++++ > 4 files changed, 179 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c > > diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02 > 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -8050,6 +8050,10 @@ and, > (bit_not > (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3)))) > > +(for bit_op (bit_and bit_ior bit_xor) > + (match (bitop_with_inv_p @0 @1) > + (bit_op:c @0 @1))) > + > /* n - (((n > C1) ? n : C1) & -C2) -> n & C1 for unsigned case. > n - (((n > C1) ? n : C1) & -C2) -> (n <= C1) ? n : (n & C1) for signed > case. */ > (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c > b/gcc/testsuite/gcc.target/i386/pr105735-1.c > new file mode 100644 > index 00000000000..8d2123ed351 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c > @@ -0,0 +1,88 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" > +} } */ > + > +unsigned long long > +__attribute__((noipa)) > +foo (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 64; bit++) > + tmp &= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo1 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 63; bit >= 0; bit -=3) > + tmp &= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo2 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 64; bit++) > + tmp |= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo3 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 63; bit >= 0; bit -=3) > + tmp |= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo4 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 64; bit++) > + tmp ^= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo5 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 63; bit++) > + tmp ^= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +f (unsigned long long tmp, long long bit, unsigned long long bit2) { > + unsigned long long res = tmp; > + for (long long i = 0; i < bit; i++) > + res &= bit2; > + return res; > +} > + > +unsigned long long > +__attribute__((noipa)) > +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) { > + unsigned long long res = tmp; > + for (long long i = 0; i < bit; i++) > + res |= bit2; > + return res; > +} > + > +unsigned long long > +__attribute__((noipa)) > +f2 (unsigned long long tmp, long long bit, unsigned long long bit2) { > + unsigned long long res = tmp; > + for (long long i = 0; i < bit; i++) > + res ^= bit2; > + return res; > +} > + > diff --git a/gcc/testsuite/gcc.target/i386/pr105735-2.c > b/gcc/testsuite/gcc.target/i386/pr105735-2.c > new file mode 100644 > index 00000000000..79c1d300b1b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr105735-2.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1" } */ > + > +#include "pr105735-1.c" > + > +int main() > +{ > + unsigned long long tmp = 0x1101101ULL; > + unsigned long long bit2 = 0x1111111011110111ULL; > + if (foo (tmp, bit2) != 0x1100101ULL) > + __builtin_abort (); > + if (foo1 (tmp, bit2) != 0x1100101ULL) > + __builtin_abort (); > + if (foo2 (tmp, bit2) != 0x1111111011111111ULL) > + __builtin_abort (); > + if (foo3 (tmp, bit2) != 0x1111111011111111ULL) > + __builtin_abort (); > + if (foo4 (tmp, bit2) != 0x1101101ULL) > + __builtin_abort (); > + if (foo5 (tmp, bit2) != 0x1111111010011010ULL) > + __builtin_abort (); > + if (f (tmp, 64, bit2) != 0x1100101ULL) > + __builtin_abort (); > + if (f1 (tmp, 64, bit2) != 0x1111111011111111ULL) > + __builtin_abort (); > + if (f2 (tmp, 64, bit2) != 0x1101101ULL) > + __builtin_abort (); > +} > diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index > fc59d035b19..81220f08d2e 100644 > --- a/gcc/tree-scalar-evolution.cc > +++ b/gcc/tree-scalar-evolution.cc > @@ -3635,6 +3635,53 @@ enum bit_op_kind > return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits)); } > > +/* Match.pd function to match bitop with invariant expression > + .i.e. > + tmp_7 = _0 & _1; */ > +extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree)); > + > +/* Return the inductive expression of bitop with invariant if possible, > + otherwise returns DEF. */ > +static tree > +analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, > + tree niter) > +{ > + tree match_op[2],inv; > + tree type = TREE_TYPE (phidef); > + gphi* header_phi = NULL; > + /* match thing like op0 (match[0]), op1(match[1]), phidef(PHIDEF) > + > + op1 = PHI <phidef, inv> > + phidef = op0 & op1 > + if op0 is an invariant, it could change to > + phidef = op0 & inv. */ > + if (!gimple_bitop_with_inv_p (phidef, &match_op[0], NULL) > + || TREE_CODE (match_op[1]) != SSA_NAME > + || !expr_invariant_in_loop_p (loop, match_op[0]) > + || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1]))) > + || gimple_phi_num_args (header_phi) != 2) > + return NULL_TREE; > + > + if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef) > + return NULL_TREE; > + > + enum tree_code code1 > + = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef)); > + > + if (code1 == BIT_XOR_EXPR) > + { > + if (!tree_fits_uhwi_p (niter)) > + return NULL_TREE; > + unsigned HOST_WIDE_INT niter_num; > + niter_num = tree_to_uhwi (niter); > + if (niter_num % 2 != 0) > + match_op[0] = build_zero_cst (type); > + } > + > + inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop)); > + return fold_build2 (code1, type, inv, match_op[0]); } > + > /* Do final value replacement for LOOP, return true if we did anything. */ > > bool > @@ -3687,6 +3734,18 @@ final_value_replacement_loop (class loop *loop) > &folded_casts); > def = compute_overall_effect_of_inner_loop (ex_loop, def); > > + /* Handle bitop with invariant induction expression. > + > + .i.e > + for (int i =0 ;i < 32; i++) > + tmp &= bit2; > + if bit2 is an invariant in loop which could simple to > + tmp &= bit2. */ > + tree bitinv_def; > + if ((bitinv_def > + = analyze_and_compute_bitop_with_inv_effect (loop, phidef, niter))) > + def = bitinv_def; > + > /* Handle bitwise induction expression. > > .i.e. > -- > 2.18.2
RE: [PATCH] Enhance final_value_replacement_loop to handle bitop with an invariant induction.[PR105735]
Kong, Lingling via Gcc-patches Mon, 22 Aug 2022 19:17:33 -0700
- [PATCH] Enhance final_value_replacement_loo... Kong, Lingling via Gcc-patches
- RE: [PATCH] Enhance final_value_replac... Kong, Lingling via Gcc-patches
- Re: [PATCH] Enhance final_value_replac... Richard Biener via Gcc-patches
- RE: [PATCH] Enhance final_value_re... Kong, Lingling via Gcc-patches
- Re: [PATCH] Enhance final_valu... Richard Biener via Gcc-patches
- RE: [PATCH] Enhance final_... Kong, Lingling via Gcc-patches
- RE: [PATCH] Enhance f... Kong, Lingling via Gcc-patches