Hi  Richard,  could you help to have a look for the patch ?
 
> Hi,
> 
> This patch is for pr105735/pr101991. It will enable below optimization:
> {
> -  long unsigned int bit;
> -
> -  <bb 2> [local count: 32534376]:
> -
> -  <bb 3> [local count: 1041207449]:
> -  # tmp_10 = PHI <tmp_7(5), tmp_4(D)(2)>
> -  # bit_12 = PHI <bit_8(5), 0(2)>
> -  tmp_7 = bit2_6(D) & tmp_10;
> -  bit_8 = bit_12 + 1;
> -  if (bit_8 != 32)
> -    goto <bb 5>; [96.97%]
> -  else
> -    goto <bb 4>; [3.03%]
> -
> -  <bb 5> [local count: 1009658865]:
> -  goto <bb 3>; [100.00%]
> -
> -  <bb 4> [local count: 32534376]:
> -  # tmp_11 = PHI <tmp_7(3)>
> -  return tmp_11;
> +  tmp_11 = tmp_4(D) & bit2_6(D);
> +  return tmp_11;
> 
> }
> 
> Ok for master ?
> 
> gcc/ChangeLog:
> 
>       PR middle-end/105735
>       * match.pd (bitop_with_inv_p): New match.
>       * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare.
>       (analyze_and_compute_bitop_with_inv_effect): New function.
>       (final_value_replacement_loop): Enhanced to handle bitop
>       with inv induction.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/i386/pr105735-1.c: New test.
>       * gcc.target/i386/pr105735-2.c: New test.
> ---
>  gcc/match.pd                               |  4 +
>  gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++++++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++++++
>  gcc/tree-scalar-evolution.cc               | 59 +++++++++++++++
>  4 files changed, 179 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02
> 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8050,6 +8050,10 @@ and,
>   (bit_not
>    (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3))))
> 
> +(for bit_op (bit_and bit_ior bit_xor)
> + (match (bitop_with_inv_p @0 @1)
> +  (bit_op:c @0 @1)))
> +
>  /* n - (((n > C1) ? n : C1) & -C2) ->  n & C1 for unsigned case.
>     n - (((n > C1) ? n : C1) & -C2) ->  (n <= C1) ? n : (n & C1) for signed 
> case.  */
> (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c
> b/gcc/testsuite/gcc.target/i386/pr105735-1.c
> new file mode 100644
> index 00000000000..8d2123ed351
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c
> @@ -0,0 +1,88 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-sccp-details" } */
> +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp"
> +} } */
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 64; bit++)
> +    tmp &= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo1 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 63; bit >= 0; bit -=3)
> +    tmp &= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo2 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 64; bit++)
> +    tmp |= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo3 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 63; bit >= 0; bit -=3)
> +    tmp |= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo4 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 64; bit++)
> +    tmp ^= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +foo5 (unsigned long long tmp, unsigned long long bit2) {
> +  for (int bit = 0; bit < 63; bit++)
> +    tmp ^= bit2;
> +  return tmp;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +f (unsigned long long tmp, long long bit, unsigned long long bit2) {
> +  unsigned long long res = tmp;
> +  for (long long i = 0; i < bit; i++)
> +    res &= bit2;
> +  return res;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) {
> +  unsigned long long res = tmp;
> +  for (long long i = 0; i < bit; i++)
> +    res |= bit2;
> +  return res;
> +}
> +
> +unsigned long long
> +__attribute__((noipa))
> +f2 (unsigned long long tmp, long long bit, unsigned long long bit2) {
> +  unsigned long long res = tmp;
> +  for (long long i = 0; i < bit; i++)
> +    res ^= bit2;
> +  return res;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/i386/pr105735-2.c
> b/gcc/testsuite/gcc.target/i386/pr105735-2.c
> new file mode 100644
> index 00000000000..79c1d300b1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105735-2.c
> @@ -0,0 +1,28 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +#include "pr105735-1.c"
> +
> +int main()
> +{
> +  unsigned long long tmp = 0x1101101ULL;
> +  unsigned long long bit2 = 0x1111111011110111ULL;
> +  if (foo (tmp, bit2) != 0x1100101ULL)
> +    __builtin_abort ();
> +  if (foo1 (tmp, bit2) != 0x1100101ULL)
> +    __builtin_abort ();
> +  if (foo2 (tmp, bit2) != 0x1111111011111111ULL)
> +    __builtin_abort ();
> +  if (foo3 (tmp, bit2) != 0x1111111011111111ULL)
> +    __builtin_abort ();
> +  if (foo4 (tmp, bit2) != 0x1101101ULL)
> +    __builtin_abort ();
> +  if (foo5 (tmp, bit2) != 0x1111111010011010ULL)
> +    __builtin_abort ();
> +  if (f (tmp, 64, bit2) != 0x1100101ULL)
> +    __builtin_abort ();
> +  if (f1 (tmp, 64, bit2) != 0x1111111011111111ULL)
> +    __builtin_abort ();
> +  if (f2 (tmp, 64, bit2) != 0x1101101ULL)
> +    __builtin_abort ();
> +}
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index
> fc59d035b19..81220f08d2e 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3635,6 +3635,53 @@ enum bit_op_kind
>    return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits));  }
> 
> +/* Match.pd function to match bitop with invariant expression
> +  .i.e.
> +  tmp_7 = _0 & _1; */
> +extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree));
> +
> +/* Return the inductive expression of bitop with invariant if possible,
> +   otherwise returns DEF.  */
> +static tree
> +analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
> +                                        tree niter)
> +{
> +  tree match_op[2],inv;
> +  tree type = TREE_TYPE (phidef);
> +  gphi* header_phi = NULL;
> +  /* match thing like op0 (match[0]), op1(match[1]), phidef(PHIDEF)
> +
> +    op1 =  PHI <phidef, inv>
> +    phidef = op0 & op1
> +    if op0 is an invariant, it could change to
> +    phidef = op0 & inv.  */
> +  if (!gimple_bitop_with_inv_p (phidef, &match_op[0], NULL)
> +      || TREE_CODE (match_op[1]) != SSA_NAME
> +      || !expr_invariant_in_loop_p (loop, match_op[0])
> +      || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1])))
> +      || gimple_phi_num_args (header_phi) != 2)
> +    return NULL_TREE;
> +
> +  if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
> +    return NULL_TREE;
> +
> +  enum tree_code code1
> +    = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef));
> +
> +  if (code1 == BIT_XOR_EXPR)
> +    {
> +       if (!tree_fits_uhwi_p (niter))
> +     return NULL_TREE;
> +       unsigned HOST_WIDE_INT niter_num;
> +       niter_num = tree_to_uhwi (niter);
> +       if (niter_num % 2 != 0)
> +     match_op[0] =  build_zero_cst (type);
> +    }
> +
> +  inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
> + return fold_build2 (code1, type, inv, match_op[0]); }
> +
>  /* Do final value replacement for LOOP, return true if we did anything.  */
> 
>  bool
> @@ -3687,6 +3734,18 @@ final_value_replacement_loop (class loop *loop)
>                                             &folded_casts);
>        def = compute_overall_effect_of_inner_loop (ex_loop, def);
> 
> +      /* Handle bitop with invariant induction expression.
> +
> +     .i.e
> +     for (int i =0 ;i < 32; i++)
> +       tmp &= bit2;
> +     if bit2 is an invariant in loop which could simple to
> +     tmp &= bit2.  */
> +      tree bitinv_def;
> +      if ((bitinv_def
> +        = analyze_and_compute_bitop_with_inv_effect (loop, phidef, niter)))
> +     def = bitinv_def;
> +
>        /* Handle bitwise induction expression.
> 
>        .i.e.
> --
> 2.18.2

Reply via email to