Hi. The attached patch adds new match-and-simplify patterns, which fold ~((~a) >> b) into (a >> b) for arithmetic shifts (i.e. when A is signed) and perform similar folds for rotations. It also fixes PR tree-optimization/54579 (because we already fold (-a - 1) into ~a).
A couple of questions: 1. Should we limit folding to this special case or rather introduce some canonical order of bitnot and shifts (when they are commutative)? In the latter case, which order is better: bitnot as shift/rotate operand or vise-versa? 2. I noticed that some rotation patterns are folded on tree, while other are folded rather late (during second forward propagation). For example on LP64: #define INT_BITS (sizeof (int) * 8) unsigned int rol(unsigned int a, unsigned int b) { return a << b | a >> (INT_BITS - b); } INT_BITS has type unsigned long, so b and (INT_BITS - b) have different types and tree folding fails (if I change int to long, everything is OK). Should this be addressed somehow? 3. Do the new patterns require any special handling of nop-conversions? -- Regards, Mikhail Maltsev
gcc/ChangeLog: 2015-06-15 Mikhail Maltsev <malts...@gmail.com> * match.pd: (~((~X) >> Y) -> X >> Y): New pattern. (~((~X) r>> Y) -> X r>> Y): New pattern. (~((~X) r<< Y) -> X r<< Y): New pattern. gcc/testsuite/ChangeLog: 2015-06-15 Mikhail Maltsev <malts...@gmail.com> * gcc.dg/fold-notrotate-1.c: New test. * gcc.dg/fold-notshift-1.c: New test. * gcc.dg/fold-notshift-2.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd index 1ab2b1c..487af72 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -696,6 +696,21 @@ along with GCC; see the file COPYING3. If not see && wi::eq_p (wi::lshift (@0, cand), @2)) (cmp @1 { build_int_cst (TREE_TYPE (@1), cand); }))))) +/* ~((~X) >> Y) -> X >> Y (for arithmetic shift). */ +(simplify + (bit_not (rshift (bit_not @0) @1)) + (if (!TYPE_UNSIGNED (TREE_TYPE (@0))) + (rshift @0 @1))) + +/* Same as above, but for rotations. */ +(for rotate (lrotate rrotate) + (simplify + (bit_not (rotate (bit_not @0) @1)) + (rotate @0 @1))) + +/* TODO: ~((-X + CST) >> Y) -> (X - (CST + 1)) >> Y, + if overflow does not trap. */ + /* Simplifications of conversions. */ /* Basic strip-useless-type-conversions / strip_nops. */ diff --git a/gcc/testsuite/gcc.dg/fold-notrotate-1.c b/gcc/testsuite/gcc.dg/fold-notrotate-1.c new file mode 100644 index 0000000..7fc43d4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-notrotate-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +#define INT_BITS (sizeof (int) * __CHAR_BIT__) +#define ROL(x, y) ((x) << (y) | (x) >> (INT_BITS - (y))) +#define ROR(x, y) ((x) >> (y) | (x) << (INT_BITS - (y))) + +unsigned int +rol (unsigned int a, unsigned int b) +{ + return ~ROL (~a, b); +} + +unsigned int +ror (unsigned int a, unsigned int b) +{ + return ~ROR (~a, b); +} + +#define LONG_BITS (sizeof (long) * __CHAR_BIT__) +#define ROLL(x, y) ((x) << (y) | (x) >> (LONG_BITS - (y))) +#define RORL(x, y) ((x) >> (y) | (x) << (LONG_BITS - (y))) + +unsigned long +roll (unsigned long a, unsigned long b) +{ + return ~ROLL (~a, b); +} + +unsigned long +rorl (unsigned long a, unsigned long b) +{ + return ~RORL (~a, b); +} + +/* { dg-final { scan-tree-dump-not "~" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-notshift-1.c b/gcc/testsuite/gcc.dg/fold-notshift-1.c new file mode 100644 index 0000000..32a55a0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-notshift-1.c @@ -0,0 +1,44 @@ +/* PR tree-optimization/54579 + PR middle-end/55299 */ + +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-cddce1" } */ + +int +asr1 (int a, int b) +{ + return ~((~a) >> b); +} + +long +asr1l (long a, long b) +{ + return ~((~a) >> b); +} + +int +asr2 (int a, int b) +{ + return -((-a - 1) >> b) - 1; +} + +int +asr3 (int a, int b) +{ + return a < 0 ? ~((~a) >> b) : a >> b; +} + +long +asr3l (long a, int b) +{ + return a < 0 ? ~((~a) >> b) : a >> b; +} + +int +asr4 (int a, int b) +{ + return a < 0 ? -(-a - 1 >> b) - 1 : a >> b; +} + +/* { dg-final { scan-tree-dump-times ">>" 6 "cddce1" } } */ +/* { dg-final { scan-tree-dump-not "~" "cddce1" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-notshift-2.c b/gcc/testsuite/gcc.dg/fold-notshift-2.c new file mode 100644 index 0000000..5287610 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-notshift-2.c @@ -0,0 +1,18 @@ +/* PR middle-end/55299 */ + +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-cddce1" } */ + +unsigned int +lsr (unsigned int a, unsigned int b) +{ + return ~((~a) >> b); +} + +int +sl (int a, int b) +{ + return ~((~a) << b); +} + +/* { dg-final { scan-tree-dump-times "~" 4 "cddce1" } } */