On Fri, 18 Oct 2024, Richard Sandiford wrote:
> match.pd had a rule to simplify ((X /[ex] A) +- B) * A -> X +- A * B
> when A and B are INTEGER_CSTs. This patch extends it to handle the
> case where the outer multiplication is by a factor of A, not just
> A itself. It also handles addition and multiplication of poly_ints.
> (Exact division by a poly_int seems unlikely.)
>
> I'm not sure why minus is handled here. Wouldn't minus of a constant be
> canonicalised to a plus?
All but A - INT_MIN, yes. For A - INT_MIN we'd know A == INT_MIN.
For unsigned we canonicalize all constants IIRC. So I agree the
minus case can go away.
OK unchanged or with the minus removed.
Thanks,
Richard.
> gcc/
> * match.pd: Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule
> to ((X /[ex] C1) +- C2) * (C1 * C3) -> (X * C3) +- (C1 * C2 * C3).
>
> gcc/testsuite/
> * gcc.dg/tree-ssa/mulexactdiv-5.c: New test.
> * gcc.dg/tree-ssa/mulexactdiv-6.c: Likewise.
> * gcc.dg/tree-ssa/mulexactdiv-7.c: Likewise.
> * gcc.dg/tree-ssa/mulexactdiv-8.c: Likewise.
> * gcc.target/aarch64/sve/cnt_fold_3.c: Likewise.
> ---
> gcc/match.pd | 38 +++++++-----
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c | 29 +++++++++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c | 59 +++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c | 22 +++++++
> gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c | 20 +++++++
> .../gcc.target/aarch64/sve/cnt_fold_3.c | 40 +++++++++++++
> 6 files changed, 194 insertions(+), 14 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 6677bc06d80..268316456c3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5493,24 +5493,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> optab_vector)))
> (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
>
> -/* ((X /[ex] A) +- B) * A --> X +- A * B. */
> +/* ((X /[ex] C1) +- C2) * (C1 * C3) --> (X * C3) +- (C1 * C2 * C3). */
> (for op (plus minus)
> (simplify
> - (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@@1))
> INTEGER_CST@2)) @1)
> - (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
> - && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2)))
> - (with
> - {
> - wi::overflow_type overflow;
> - wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),
> - TYPE_SIGN (type), &overflow);
> - }
> + (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@1))
> + poly_int_tree_p@2))
> + poly_int_tree_p@3)
> + (with { poly_widest_int factor; }
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
> + && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))
> + && multiple_p (wi::to_poly_widest (@3), wi::to_widest (@1), &factor))
> + (with
> + {
> + wi::overflow_type overflow;
> + wide_int mul;
> + }
> (if (types_match (type, TREE_TYPE (@2))
> - && types_match (TREE_TYPE (@0), TREE_TYPE (@2)) && !overflow)
> - (op @0 { wide_int_to_tree (type, mul); })
> + && types_match (TREE_TYPE (@0), TREE_TYPE (@2))
> + && TREE_CODE (@2) == INTEGER_CST
> + && TREE_CODE (@3) == INTEGER_CST
> + && (mul = wi::mul (wi::to_wide (@2), wi::to_wide (@3),
> + TYPE_SIGN (type), &overflow),
> + !overflow))
> + (op (mult @0 { wide_int_to_tree (type, factor); })
> + { wide_int_to_tree (type, mul); })
> (with { tree utype = unsigned_type_for (type); }
> - (convert (op (convert:utype @0)
> - (mult (convert:utype @1) (convert:utype @2))))))))))
> + (convert (op (mult (convert:utype @0)
> + { wide_int_to_tree (utype, factor); })
> + (mult (convert:utype @3) (convert:utype @2)))))))))))
>
> /* Canonicalization of binary operations. */
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
> new file mode 100644
> index 00000000000..37cd676fff6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
> @@ -0,0 +1,29 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, DIV, ADD, MUL) \
> + int \
> + FN (int x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= DIV; \
> + x += ADD; \
> + return x * MUL; \
> + }
> +
> +TEST_CMP (f1, 2, 1, 6)
> +TEST_CMP (f2, 2, 2, 10)
> +TEST_CMP (f3, 4, 3, 80)
> +TEST_CMP (f4, 8, 4, 200)
> +
> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<nop_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 20,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 25,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 6,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 20,} "optimized" }
> } */
> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 240,} "optimized"
> } } */
> +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 800,} "optimized"
> } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
> new file mode 100644
> index 00000000000..927cb95b82b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c
> @@ -0,0 +1,59 @@
> +/* { dg-options "-O2 -fwrapv -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, DIV, ADD, MUL) \
> + int \
> + FN (int x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= DIV; \
> + x += (int) (ADD); \
> + return x * MUL; \
> + }
> +
> +TEST_CMP (f1, 2, ~0U >> 1, 6)
> +TEST_CMP (f2, 2, ~(~0U >> 2), 10)
> +
> +void
> +cmp1 (int x)
> +{
> + if (x & 3)
> + __builtin_unreachable ();
> +
> + int y = x / 4;
> + y += (int) (~0U / 3U);
> + y *= 8;
> +
> + unsigned z = x;
> + z *= 2U;
> + z += ~0U / 3U * 8U;
> +
> + if (y != (int) z)
> + __builtin_abort ();
> +}
> +
> +
> +void
> +cmp2 (int x)
> +{
> + if (x & 63)
> + __builtin_unreachable ();
> +
> + unsigned y = x / 64;
> + y += 100U;
> + int y2 = (int) y * 256;
> +
> + unsigned z = x;
> + z *= 4U;
> + z += 25600U;
> +
> + if (y2 != (int) z)
> + __builtin_abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
> +/* { dg-final { scan-tree-dump-not {gimple_call <} "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {<nop_expr,} 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {<mult_expr,} 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {<plus_expr,} 2 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
> new file mode 100644
> index 00000000000..c6abce371db
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c
> @@ -0,0 +1,22 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +typedef __PTRDIFF_TYPE__ ptrdiff_t;
> +typedef __SIZE_TYPE__ size_t;
> +
> +void
> +cmp1 (int *ptr1, int *ptr2)
> +{
> + unsigned char x1 = ptr2 - ptr1;
> + x1 += 0x40;
> + ptrdiff_t x2 = (ptrdiff_t) x1 * (ptrdiff_t) 4;
> +
> + ptrdiff_t y = ((char *) ptr2 - (char *) ptr1) + (ptrdiff_t) 0x100;
> +
> + size_t z = (char *) ptr2 - (char *) ptr1;
> + z += (size_t) 0x100;
> +
> + if (x2 != y && x2 != (ptrdiff_t) z)
> + __builtin_abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump {gimple_call <} "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
> new file mode 100644
> index 00000000000..f452245d95d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
> +
> +#define TEST_CMP(FN, DIV, ADD, MUL) \
> + int \
> + FN (int x) \
> + { \
> + if (x & 7) \
> + __builtin_unreachable (); \
> + x /= DIV; \
> + x += ADD; \
> + return x * MUL; \
> + }
> +
> +TEST_CMP (f1, 2, 1, 3)
> +TEST_CMP (f2, 4, 2, 2)
> +TEST_CMP (f3, 4, 3, 6)
> +TEST_CMP (f4, 8, 4, 2)
> +TEST_CMP (f5, 8, 5, 4)
> +
> +/* { dg-final { scan-tree-dump-times {<[a-z]*_div_expr,} 5 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
> b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
> new file mode 100644
> index 00000000000..f9c50e26e48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** f1:
> +** sub x0, x1, x0
> +** incb x0
> +** ret
> +*/
> +uint64_t
> +f1 (int *ptr1, int *ptr2)
> +{
> + return ((ptr2 - ptr1) + svcntw ()) * 4;
> +}
> +
> +/*
> +** f2:
> +** sub x0, x1, x0
> +** incb x0, all, mul #4
> +** ret
> +*/
> +uint64_t
> +f2 (int *ptr1, int *ptr2)
> +{
> + return ((ptr2 - ptr1) + svcntb ()) * 4;
> +}
> +
> +/*
> +** f3:
> +** sub x0, x1, x0
> +** ret
> +*/
> +uint64_t
> +f3 (int *ptr1, int *ptr2)
> +{
> + return (((int *) ((char *) ptr2 + svcntb ()) - ptr1) - svcntw ()) * 4;
> +}
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)