Richard Biener <rguent...@suse.de> writes:
> On Fri, 18 Oct 2024, Richard Sandiford wrote:
>
>> match.pd had a rule to simplify ((X /[ex] A) +- B) * A -> X +- A * B
>> when A and B are INTEGER_CSTs.  This patch extends it to handle the
>> case where the outer multiplication is by a factor of A, not just
>> A itself.  It also handles addition and multiplication of poly_ints.
>> (Exact division by a poly_int seems unlikely.)
>> 
>> I'm not sure why minus is handled here.  Wouldn't minus of a constant be
>> canonicalised to a plus?
>
> All but A - INT_MIN, yes.  For A - INT_MIN we'd know A == INT_MIN.
> For unsigned we canonicalize all constants IIRC.  So I agree the
> minus case can go away.

Ah, right, thanks.  Hadn't thought about that special case.  Given that..

> OK unchanged or with the minus removed.

...I ended up leaving it unchanged. :)

Richard

>
> Thanks,
> Richard.
>
>> gcc/
>>      * match.pd: Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule
>>      to ((X /[ex] C1) +- C2) * (C1 * C3) -> (X * C3) +- (C1 * C2 * C3).
>> 
>> gcc/testsuite/
>>      * gcc.dg/tree-ssa/mulexactdiv-5.c: New test.
>>      * gcc.dg/tree-ssa/mulexactdiv-6.c: Likewise.
>>      * gcc.dg/tree-ssa/mulexactdiv-7.c: Likewise.
>>      * gcc.dg/tree-ssa/mulexactdiv-8.c: Likewise.
>>      * gcc.target/aarch64/sve/cnt_fold_3.c: Likewise.
>> ---
>>  gcc/match.pd                                  | 38 +++++++-----
>>  gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c | 29 +++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c | 59 +++++++++++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c | 22 +++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c | 20 +++++++
>>  .../gcc.target/aarch64/sve/cnt_fold_3.c       | 40 +++++++++++++
>>  6 files changed, 194 insertions(+), 14 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
>>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
>> 
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 6677bc06d80..268316456c3 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -5493,24 +5493,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>                                  optab_vector)))
>>     (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
>>  
>> -/* ((X /[ex] A) +- B) * A  -->  X +- A * B.  */
>> +/* ((X /[ex] C1) +- C2) * (C1 * C3)  -->  (X * C3) +- (C1 * C2 * C3).  */
>>  (for op (plus minus)
>>   (simplify
>> -  (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@@1)) 
>> INTEGER_CST@2)) @1)
>> -  (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
>> -       && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2)))
>> -   (with
>> -     {
>> -       wi::overflow_type overflow;
>> -       wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),
>> -                           TYPE_SIGN (type), &overflow);
>> -     }
>> +  (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@1))
>> +                   poly_int_tree_p@2))
>> +    poly_int_tree_p@3)
>> +  (with { poly_widest_int factor; }
>> +   (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
>> +    && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))
>> +    && multiple_p (wi::to_poly_widest (@3), wi::to_widest (@1), &factor))
>> +    (with
>> +      {
>> +    wi::overflow_type overflow;
>> +        wide_int mul;
>> +      }
>>       (if (types_match (type, TREE_TYPE (@2))
>> -     && types_match (TREE_TYPE (@0), TREE_TYPE (@2)) && !overflow)
>> -      (op @0 { wide_int_to_tree (type, mul); })
>> +      && types_match (TREE_TYPE (@0), TREE_TYPE (@2))
>> +      && TREE_CODE (@2) == INTEGER_CST
>> +      && TREE_CODE (@3) == INTEGER_CST
>> +      && (mul = wi::mul (wi::to_wide (@2), wi::to_wide (@3),
>> +                         TYPE_SIGN (type), &overflow),
>> +          !overflow))
>> +      (op (mult @0 { wide_int_to_tree (type, factor); })
>> +      { wide_int_to_tree (type, mul); })
>>        (with { tree utype = unsigned_type_for (type); }
>> -       (convert (op (convert:utype @0)
>> -                (mult (convert:utype @1) (convert:utype @2))))))))))
>> +       (convert (op (mult (convert:utype @0)
>> +                      { wide_int_to_tree (utype, factor); })
>> +                (mult (convert:utype @3) (convert:utype @2)))))))))))
>>  
>>  /* Canonicalization of binary operations.  */
>>  
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
>> new file mode 100644
>> index 00000000000..37cd676fff6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
>> @@ -0,0 +1,29 @@
>> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
>> +
>> +#define TEST_CMP(FN, DIV, ADD, MUL)         \
>> +  int                                               \
>> +  FN (int x)                                        \
>> +  {                                         \
>> +    if (x & 7)                                      \
>> +      __builtin_unreachable ();                     \
>> +    x /= DIV;                                       \
>> +    x += ADD;                                       \
>> +    return x * MUL;                         \
>> +  }
>> +
>> +TEST_CMP (f1, 2, 1, 6)
>> +TEST_CMP (f2, 2, 2, 10)
>> +TEST_CMP (f3, 4, 3, 80)
>> +TEST_CMP (f4, 8, 4, 200)
>> +
>> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not {<nop_expr,} "optimized" } } */
>> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } 
>> } */
>> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } 
>> } */
>> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 20,} "optimized" 
>> } } */
>> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 25,} "optimized" 
>> } } */
>> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 6,} "optimized" } 
>> } */
>> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 20,} "optimized" 
>> } } */
>> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 240,} "optimized" 
>> } } */
>> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 800,} "optimized" 
>> } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
>> new file mode 100644
>> index 00000000000..927cb95b82b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
>> @@ -0,0 +1,59 @@
>> +/* { dg-options "-O2 -fwrapv -fdump-tree-optimized-raw" } */
>> +
>> +#define TEST_CMP(FN, DIV, ADD, MUL)         \
>> +  int                                               \
>> +  FN (int x)                                        \
>> +  {                                         \
>> +    if (x & 7)                                      \
>> +      __builtin_unreachable ();                     \
>> +    x /= DIV;                                       \
>> +    x += (int) (ADD);                               \
>> +    return x * MUL;                         \
>> +  }
>> +
>> +TEST_CMP (f1, 2, ~0U >> 1, 6)
>> +TEST_CMP (f2, 2, ~(~0U >> 2), 10)
>> +
>> +void
>> +cmp1 (int x)
>> +{
>> +  if (x & 3)
>> +    __builtin_unreachable ();
>> +
>> +  int y = x / 4;
>> +  y += (int) (~0U / 3U);
>> +  y *= 8;
>> +
>> +  unsigned z = x;
>> +  z *= 2U;
>> +  z += ~0U / 3U * 8U;
>> +
>> +  if (y != (int) z)
>> +    __builtin_abort ();
>> +}
>> +
>> +
>> +void
>> +cmp2 (int x)
>> +{
>> +  if (x & 63)
>> +    __builtin_unreachable ();
>> +
>> +  unsigned y = x / 64;
>> +  y += 100U;
>> +  int y2 = (int) y * 256;
>> +
>> +  unsigned z = x;
>> +  z *= 4U;
>> +  z += 25600U;
>> +
>> +  if (y2 != (int) z)
>> +    __builtin_abort ();
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not {gimple_call <} "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times {<nop_expr,} 4 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times {<mult_expr,} 2 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times {<plus_expr,} 2 "optimized" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
>> new file mode 100644
>> index 00000000000..c6abce371db
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
>> @@ -0,0 +1,22 @@
>> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
>> +
>> +typedef __PTRDIFF_TYPE__ ptrdiff_t;
>> +typedef __SIZE_TYPE__ size_t;
>> +
>> +void
>> +cmp1 (int *ptr1, int *ptr2)
>> +{
>> +  unsigned char x1 = ptr2 - ptr1;
>> +  x1 += 0x40;
>> +  ptrdiff_t x2 = (ptrdiff_t) x1 * (ptrdiff_t) 4;
>> +
>> +  ptrdiff_t y = ((char *) ptr2 - (char *) ptr1) + (ptrdiff_t) 0x100;
>> +
>> +  size_t z = (char *) ptr2 - (char *) ptr1;
>> +  z += (size_t) 0x100;
>> +
>> +  if (x2 != y && x2 != (ptrdiff_t) z)
>> +    __builtin_abort ();
>> +}
>> +
>> +/* { dg-final { scan-tree-dump {gimple_call <} "optimized" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
>> new file mode 100644
>> index 00000000000..f452245d95d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
>> +
>> +#define TEST_CMP(FN, DIV, ADD, MUL)         \
>> +  int                                               \
>> +  FN (int x)                                        \
>> +  {                                         \
>> +    if (x & 7)                                      \
>> +      __builtin_unreachable ();                     \
>> +    x /= DIV;                                       \
>> +    x += ADD;                                       \
>> +    return x * MUL;                         \
>> +  }
>> +
>> +TEST_CMP (f1, 2, 1, 3)
>> +TEST_CMP (f2, 4, 2, 2)
>> +TEST_CMP (f3, 4, 3, 6)
>> +TEST_CMP (f4, 8, 4, 2)
>> +TEST_CMP (f5, 8, 5, 4)
>> +
>> +/* { dg-final { scan-tree-dump-times {<[a-z]*_div_expr,} 5 "optimized" } } 
>> */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
>> new file mode 100644
>> index 00000000000..f9c50e26e48
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
>> @@ -0,0 +1,40 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +
>> +#include <arm_sve.h>
>> +
>> +/*
>> +** f1:
>> +**  sub     x0, x1, x0
>> +**  incb    x0
>> +**  ret
>> +*/
>> +uint64_t
>> +f1 (int *ptr1, int *ptr2)
>> +{
>> +  return ((ptr2 - ptr1) + svcntw ()) * 4;
>> +}
>> +
>> +/*
>> +** f2:
>> +**  sub     x0, x1, x0
>> +**  incb    x0, all, mul #4
>> +**  ret
>> +*/
>> +uint64_t
>> +f2 (int *ptr1, int *ptr2)
>> +{
>> +  return ((ptr2 - ptr1) + svcntb ()) * 4;
>> +}
>> +
>> +/*
>> +** f3:
>> +**  sub     x0, x1, x0
>> +**  ret
>> +*/
>> +uint64_t
>> +f3 (int *ptr1, int *ptr2)
>> +{
>> +  return (((int *) ((char *) ptr2 + svcntb ()) - ptr1) - svcntw ()) * 4;
>> +}
>> 

Reply via email to