This patch adds a rule to simplify (X >> C1) << (C1 + C2) -> X << C2 when the low C1 bits of X are known to be zero.
Any single conversion can take place between the shifts. E.g. for a truncating conversion, any extra bits of X that are preserved by truncating after the shift are immediately lost by the shift left. And the sign bits used for an extending conversion are the same as the sign bits used for the rshift. (A double conversion of say int->unsigned->uint64_t would be wrong though.) gcc/ * match.pd: Simplify (X >> C1) << (C1 + C2) -> X << C2 if the low C1 bits of X are zero. gcc/testsuite/ * gcc.dg/tree-ssa/shifts-1.c: New test. * gcc.dg/tree-ssa/shifts-2.c: Likewise. --- gcc/match.pd | 13 +++++ gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c | 61 ++++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c | 21 ++++++++ 3 files changed, 95 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 268316456c3..540582dc984 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4902,6 +4902,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - TYPE_PRECISION (TREE_TYPE (@2))))) (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1)))) +#if GIMPLE +/* (X >> C1) << (C1 + C2) -> X << C2 if the low C1 bits of X are zero. */ +(simplify + (lshift (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1)) + INTEGER_CST@2) + (if (INTEGRAL_TYPE_P (type) + && wi::ltu_p (wi::to_wide (@1), element_precision (type)) + && wi::ltu_p (wi::to_wide (@2), element_precision (type)) + && wi::to_widest (@2) >= wi::to_widest (@1) + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))) + (lshift (convert @0) (minus @2 @1)))) +#endif + /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for unsigned x OR truncate into the precision(type) - c lowest bits of signed x (if they have mode precision or a precision of 1). */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c new file mode 100644 index 00000000000..d88500ca8dd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c @@ -0,0 +1,61 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return x << 3; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + unsigned char y = x; + y >>= 2; + return y << 3; +} + +unsigned long +f3 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return (unsigned long) x << 3; +} + +int +f4 (int x) +{ + if (x & 15) + __builtin_unreachable (); + x >>= 4; + return x << 5; +} + +unsigned int +f5 (int x) +{ + if (x & 31) + __builtin_unreachable (); + x >>= 5; + return x << 6; +} + +unsigned int +f6 (unsigned int x) +{ + if (x & 1) + __builtin_unreachable (); + x >>= 1; + return x << (sizeof (int) * __CHAR_BIT__ - 1); +} + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-times {<lshift_expr, [^,]*, [^,]*, 1,} 5 "optimized" } } */ +/* { dg-final { scan-tree-dump {<lshift_expr, [^,]*, [^,]*, 30,} "optimized" { target int32 } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c new file mode 100644 index 00000000000..67ba4a75aec --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 3; + return x << 4; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) + __builtin_unreachable (); + x >>= 2; + return x << 1; +} + +/* { dg-final { scan-tree-dump-times {<rshift_expr,} 2 "optimized" } } */ -- 2.25.1