On Fri, 18 Oct 2024, Richard Sandiford wrote: > match.pd had a rule to simplify ((X /[ex] A) +- B) * A -> X +- A * B > when A and B are INTEGER_CSTs. This patch extends it to handle the > case where the outer multiplication is by a factor of A, not just > A itself. It also handles addition and multiplication of poly_ints. > (Exact division by a poly_int seems unlikely.) > > I'm not sure why minus is handled here. Wouldn't minus of a constant be > canonicalised to a plus?
All but A - INT_MIN, yes. For A - INT_MIN we'd know A == INT_MIN. For unsigned we canonicalize all constants IIRC. So I agree the minus case can go away. OK unchanged or with the minus removed. Thanks, Richard. > gcc/ > * match.pd: Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule > to ((X /[ex] C1) +- C2) * (C1 * C3) -> (X * C3) +- (C1 * C2 * C3). > > gcc/testsuite/ > * gcc.dg/tree-ssa/mulexactdiv-5.c: New test. > * gcc.dg/tree-ssa/mulexactdiv-6.c: Likewise. > * gcc.dg/tree-ssa/mulexactdiv-7.c: Likewise. > * gcc.dg/tree-ssa/mulexactdiv-8.c: Likewise. > * gcc.target/aarch64/sve/cnt_fold_3.c: Likewise. > --- > gcc/match.pd | 38 +++++++----- > gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c | 29 +++++++++ > gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c | 59 +++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c | 22 +++++++ > gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c | 20 +++++++ > .../gcc.target/aarch64/sve/cnt_fold_3.c | 40 +++++++++++++ > 6 files changed, 194 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 6677bc06d80..268316456c3 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -5493,24 +5493,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > optab_vector))) > (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); }))) > > -/* ((X /[ex] A) +- B) * A --> X +- A * B. */ > +/* ((X /[ex] C1) +- C2) * (C1 * C3) --> (X * C3) +- (C1 * C2 * C3). */ > (for op (plus minus) > (simplify > - (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@@1)) > INTEGER_CST@2)) @1) > - (if (tree_nop_conversion_p (type, TREE_TYPE (@2)) > - && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))) > - (with > - { > - wi::overflow_type overflow; > - wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2), > - TYPE_SIGN (type), &overflow); > - } > + (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@1)) > + poly_int_tree_p@2)) > + poly_int_tree_p@3) > + (with { poly_widest_int factor; } > + (if (tree_nop_conversion_p (type, TREE_TYPE (@2)) > + && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2)) > + && multiple_p (wi::to_poly_widest (@3), wi::to_widest (@1), &factor)) > + (with > + { > + wi::overflow_type overflow; > + wide_int mul; > + } > (if (types_match (type, TREE_TYPE (@2)) > - && types_match (TREE_TYPE (@0), TREE_TYPE (@2)) && !overflow) > - (op @0 { wide_int_to_tree (type, mul); }) > + && types_match (TREE_TYPE (@0), TREE_TYPE (@2)) > + && TREE_CODE (@2) == INTEGER_CST > + && TREE_CODE (@3) == INTEGER_CST > + && (mul = wi::mul (wi::to_wide (@2), wi::to_wide (@3), > + TYPE_SIGN (type), &overflow), > + !overflow)) > + (op (mult @0 { wide_int_to_tree (type, factor); }) > + { wide_int_to_tree (type, mul); }) > (with { tree utype = unsigned_type_for (type); } > - (convert (op (convert:utype @0) > - (mult (convert:utype @1) (convert:utype @2)))))))))) > + (convert (op (mult (convert:utype @0) > + { wide_int_to_tree (utype, factor); }) > + (mult (convert:utype @3) (convert:utype @2))))))))))) > > /* Canonicalization of binary operations. */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c > b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c > new file mode 100644 > index 00000000000..37cd676fff6 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c > @@ -0,0 +1,29 @@ > +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ > + > +#define TEST_CMP(FN, DIV, ADD, MUL) \ > + int \ > + FN (int x) \ > + { \ > + if (x & 7) \ > + __builtin_unreachable (); \ > + x /= DIV; \ > + x += ADD; \ > + return x * MUL; \ > + } > + > +TEST_CMP (f1, 2, 1, 6) > +TEST_CMP (f2, 2, 2, 10) > +TEST_CMP (f3, 4, 3, 80) > +TEST_CMP (f4, 8, 4, 200) > + > +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump-not {<nop_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 20,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 25,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 6,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 20,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 240,} "optimized" > } } */ > +/* { dg-final { scan-tree-dump {<plus_expr, [^,]*, [^,]*, 800,} "optimized" > } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c > b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c > new file mode 100644 > index 00000000000..927cb95b82b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c > @@ -0,0 +1,59 @@ > +/* { dg-options "-O2 -fwrapv -fdump-tree-optimized-raw" } */ > + > +#define TEST_CMP(FN, DIV, ADD, MUL) \ > + int \ > + FN (int x) \ > + { \ > + if (x & 7) \ > + __builtin_unreachable (); \ > + x /= DIV; \ > + x += (int) (ADD); \ > + return x * MUL; \ > + } > + > +TEST_CMP (f1, 2, ~0U >> 1, 6) > +TEST_CMP (f2, 2, ~(~0U >> 2), 10) > + > +void > +cmp1 (int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + > + int y = x / 4; > + y += (int) (~0U / 3U); > + y *= 8; > + > + unsigned z = x; > + z *= 2U; > + z += ~0U / 3U * 8U; > + > + if (y != (int) z) > + __builtin_abort (); > +} > + > + > +void > +cmp2 (int x) > +{ > + if (x & 63) > + __builtin_unreachable (); > + > + unsigned y = x / 64; > + y += 100U; > + int y2 = (int) y * 256; > + > + unsigned z = x; > + z *= 4U; > + z += 25600U; > + > + if (y2 != (int) z) > + __builtin_abort (); > +} > + > +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump-not {gimple_call <} "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {<nop_expr,} 4 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {<mult_expr,} 2 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {<plus_expr,} 2 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c > b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c > new file mode 100644 > index 00000000000..c6abce371db > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c > @@ -0,0 +1,22 @@ > +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ > + > +typedef __PTRDIFF_TYPE__ ptrdiff_t; > +typedef __SIZE_TYPE__ size_t; > + > +void > +cmp1 (int *ptr1, int *ptr2) > +{ > + unsigned char x1 = ptr2 - ptr1; > + x1 += 0x40; > + ptrdiff_t x2 = (ptrdiff_t) x1 * (ptrdiff_t) 4; > + > + ptrdiff_t y = ((char *) ptr2 - (char *) ptr1) + (ptrdiff_t) 0x100; > + > + size_t z = (char *) ptr2 - (char *) ptr1; > + z += (size_t) 0x100; > + > + if (x2 != y && x2 != (ptrdiff_t) z) > + __builtin_abort (); > +} > + > +/* { dg-final { scan-tree-dump {gimple_call <} "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c > b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c > new file mode 100644 > index 00000000000..f452245d95d > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c > @@ -0,0 +1,20 @@ > +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ > + > +#define TEST_CMP(FN, DIV, ADD, MUL) \ > + int \ > + FN (int x) \ > + { \ > + if (x & 7) \ > + __builtin_unreachable (); \ > + x /= DIV; \ > + x += ADD; \ > + return x * MUL; \ > + } > + > +TEST_CMP (f1, 2, 1, 3) > +TEST_CMP (f2, 4, 2, 2) > +TEST_CMP (f3, 4, 3, 6) > +TEST_CMP (f4, 8, 4, 2) > +TEST_CMP (f5, 8, 5, 4) > + > +/* { dg-final { scan-tree-dump-times {<[a-z]*_div_expr,} 5 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c > b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c > new file mode 100644 > index 00000000000..f9c50e26e48 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <arm_sve.h> > + > +/* > +** f1: > +** sub x0, x1, x0 > +** incb x0 > +** ret > +*/ > +uint64_t > +f1 (int *ptr1, int *ptr2) > +{ > + return ((ptr2 - ptr1) + svcntw ()) * 4; > +} > + > +/* > +** f2: > +** sub x0, x1, x0 > +** incb x0, all, mul #4 > +** ret > +*/ > +uint64_t > +f2 (int *ptr1, int *ptr2) > +{ > + return ((ptr2 - ptr1) + svcntb ()) * 4; > +} > + > +/* > +** f3: > +** sub x0, x1, x0 > +** ret > +*/ > +uint64_t > +f3 (int *ptr1, int *ptr2) > +{ > + return (((int *) ((char *) ptr2 + svcntb ()) - ptr1) - svcntw ()) * 4; > +} > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)