On Fri, 18 Oct 2024, Richard Sandiford wrote: > This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2 > when the low C1 bits of X are known to be zero. As with the earlier > X >> C1 << (C2 + C1) patch, any single conversion is allowed between > the shift and the multiplication.
OK. Thanks, Richard. > gcc/ > * match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the > low C1 bits of X are zero. > > gcc/testsuite/ > * gcc.dg/tree-ssa/shifts-3.c: New test. > * gcc.dg/tree-ssa/shifts-4.c: Likewise. > * gcc.target/aarch64/sve/cnt_fold_5.c: Likewise. > --- > gcc/match.pd | 13 ++++ > gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c | 65 +++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c | 23 +++++++ > .../gcc.target/aarch64/sve/cnt_fold_5.c | 38 +++++++++++ > 4 files changed, 139 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 41903554478..85f5eeefa08 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -4915,6 +4915,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && wi::to_widest (@2) >= wi::to_widest (@1) > && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))) > (lshift (convert @0) (minus @2 @1)))) > + > +/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. */ > +(simplify > + (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1)) > + poly_int_tree_p@2) > + (with { poly_widest_int factor; } > + (if (INTEGRAL_TYPE_P (type) > + && wi::ltu_p (wi::to_wide (@1), element_precision (type)) > + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)) > + && multiple_p (wi::to_poly_widest (@2), > + widest_int (1) << tree_to_uhwi (@1), > + &factor)) > + (mult (convert @0) { wide_int_to_tree (type, factor); })))) > #endif > > /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c > b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c > new file mode 100644 > index 00000000000..dcff518e630 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c > @@ -0,0 +1,65 @@ > +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ > + > +unsigned int > +f1 (unsigned int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 2; > + return x * 20; > +} > + > +unsigned int > +f2 (unsigned int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + unsigned char y = x; > + y >>= 2; > + return y * 36; > +} > + > +unsigned long > +f3 (unsigned int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 2; > + return (unsigned long) x * 88; > +} > + > +int > +f4 (int x) > +{ > + if (x & 15) > + __builtin_unreachable (); > + x >>= 4; > + return x * 48; > +} > + > +unsigned int > +f5 (int x) > +{ > + if (x & 31) > + __builtin_unreachable (); > + x >>= 5; > + return x * 3200; > +} > + > +unsigned int > +f6 (unsigned int x) > +{ > + if (x & 1) > + __builtin_unreachable (); > + x >>= 1; > + return x * (~0U / 3 & -2); > +} > + > +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 9,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<(?:widen_)?mult_expr, [^,]*, [^,]*, 22,} > "optimized" } } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 100,} "optimized" > } } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 715827882,} > "optimized" { target int32 } } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c > b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c > new file mode 100644 > index 00000000000..5638653d0c2 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c > @@ -0,0 +1,23 @@ > +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ > + > +unsigned int > +f1 (unsigned int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 2; > + return x * 10; > +} > + > +unsigned int > +f2 (unsigned int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 3; > + return x * 24; > +} > + > +/* { dg-final { scan-tree-dump-times {<rshift_expr,} 2 "optimized" } } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 10,} "optimized" } > } */ > +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 24,} "optimized" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c > b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c > new file mode 100644 > index 00000000000..3f60e9b4941 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <arm_sve.h> > + > +/* > +** f1: > +** ... > +** cntd [^\n]+ > +** ... > +** mul [^\n]+ > +** ret > +*/ > +uint64_t > +f1 (int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 2; > + return (uint64_t) x * svcnth (); > +} > + > +/* > +** f2: > +** ... > +** asr [^\n]+ > +** ... > +** ret > +*/ > +uint64_t > +f2 (int x) > +{ > + if (x & 3) > + __builtin_unreachable (); > + x >>= 2; > + return (uint64_t) x * svcntw (); > +} > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)