This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2 when the low C1 bits of X are known to be zero. As with the earlier X >> C1 << (C2 + C1) patch, any single conversion is allowed between the shift and the multiplication.
gcc/ * match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. gcc/testsuite/ * gcc.dg/tree-ssa/shifts-3.c: New test. * gcc.dg/tree-ssa/shifts-4.c: Likewise. * gcc.target/aarch64/sve/cnt_fold_5.c: Likewise. --- gcc/match.pd | 13 ++++ gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c | 65 +++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c | 23 +++++++ .../gcc.target/aarch64/sve/cnt_fold_5.c | 38 +++++++++++ 4 files changed, 139 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c diff --git a/gcc/match.pd b/gcc/match.pd index 41903554478..85f5eeefa08 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4915,6 +4915,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && wi::to_widest (@2) >= wi::to_widest (@1) && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))) (lshift (convert @0) (minus @2 @1)))) + +/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. */ +(simplify + (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1)) + poly_int_tree_p@2) + (with { poly_widest_int factor; } + (if (INTEGRAL_TYPE_P (type) + && wi::ltu_p (wi::to_wide (@1), element_precision (type)) + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)) + && multiple_p (wi::to_poly_widest (@2), + widest_int (1) << tree_to_uhwi (@1), + &factor)) + (mult (convert @0) { wide_int_to_tree (type, factor); })))) #endif /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c new file mode 100644 index 00000000000..dcff518e630 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c @@ -0,0 +1,65 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return x * 20; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + unsigned char y = x; + y >>= 2; + return y * 36; +} + +unsigned long +f3 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return (unsigned long) x * 88; +} + +int +f4 (int x) +{ + if (x & 15) + __builtin_unreachable (); + x >>= 4; + return x * 48; +} + +unsigned int +f5 (int x) +{ + if (x & 31) + __builtin_unreachable (); + x >>= 5; + return x * 3200; +} + +unsigned int +f6 (unsigned int x) +{ + if (x & 1) + __builtin_unreachable (); + x >>= 1; + return x * (~0U / 3 & -2); +} + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 9,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<(?:widen_)?mult_expr, [^,]*, [^,]*, 22,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 100,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 715827882,} "optimized" { target int32 } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c new file mode 100644 index 00000000000..5638653d0c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c @@ -0,0 +1,23 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return x * 10; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 3; + return x * 24; +} + +/* { dg-final { scan-tree-dump-times {<rshift_expr,} 2 "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 10,} "optimized" } } */ +/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 24,} "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c new file mode 100644 index 00000000000..3f60e9b4941 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +/* +** f1: +** ... +** cntd [^\n]+ +** ... +** mul [^\n]+ +** ret +*/ +uint64_t +f1 (int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return (uint64_t) x * svcnth (); +} + +/* +** f2: +** ... +** asr [^\n]+ +** ... +** ret +*/ +uint64_t +f2 (int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return (uint64_t) x * svcntw (); +} -- 2.25.1