This short patch teaches fold that it is "safe" to change the sign of a left shift, to reduce the number of type conversions in gimple. As an example:
unsigned int foo(unsigned int i) { return (int)i << 8; } is currently optimized to: unsigned int foo (unsigned int i) { int i.0_1; int _2; unsigned int _4; <bb 2> [local count: 1073741824]: i.0_1 = (int) i_3(D); _2 = i.0_1 << 8; _4 = (unsigned int) _2; return _4; } with this patch, this now becomes: unsigned int foo (unsigned int i) { unsigned int _2; <bb 2> [local count: 1073741824]: _2 = i_1(D) << 8; return _2; } which generates exactly the same assembly language. Aside from the reduced memory usage, the real benefit is that no-op conversions tend to interfere with many folding optimizations. For example, unsigned int bar(unsigned char i) { return (i ^ (i<<16)) | (i<<8); } currently gets (tangled in conversions and) optimized to: unsigned int bar (unsigned char i) { unsigned int _1; unsigned int _2; int _3; int _4; unsigned int _6; unsigned int _8; <bb 2> [local count: 1073741824]: _1 = (unsigned int) i_5(D); _2 = _1 * 65537; _3 = (int) i_5(D); _4 = _3 << 8; _8 = (unsigned int) _4; _6 = _2 | _8; return _6; } but with this patch, bar now optimizes down to: unsigned int bar(unsigned char i) { unsigned int _1; unsigned int _4; <bb 2> [local count: 1073741824]: _1 = (unsigned int) i_3(D); _4 = _1 * 65793; return _4; } This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap" and "make -k check" with no new failures. Ok for mainline? 2021-08-23 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * match.pd (shift transformations): Change the sign of an LSHIFT_EXPR if it reduces the number of explicit conversions. gcc/testsuite/ChangeLog * gcc.dg/fold-convlshift-1.c: New test case. * gcc.dg/fold-convlshift-2.c: New test case. Roger --
diff --git a/gcc/match.pd b/gcc/match.pd index 0fcfd0e..978a1b0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3385,6 +3385,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (integer_zerop (@2) || integer_all_onesp (@2)) (cmp @0 @2))))) +/* Both signed and unsigned lshift produce the same result, so use + the form that minimizes the number of conversions. */ +(simplify + (convert (lshift:s@0 (convert:s@1 @2) INTEGER_CST@3)) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)) + (lshift (convert @2) @3))) + /* Simplifications of conversions. */ /* Basic strip-useless-type-conversions / strip_nops. */
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ unsigned int foo(unsigned int i) { int t1 = i; int t2 = t1 << 8; return t2; } int bar(int i) { unsigned int t1 = i; unsigned int t2 = t1 << 8; return t2; } /* { dg-final { scan-tree-dump-not "\\(int\\)" "optimized" } } */ /* { dg-final { scan-tree-dump-not "\\(unsigned int\\)" "optimized" } } */
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ unsigned int foo(unsigned char c) { int t1 = c; int t2 = t1 << 8; return t2; } int bar(unsigned char c) { unsigned int t1 = c; unsigned int t2 = t1 << 8; return t2; } /* { dg-final { scan-tree-dump-times "\\(int\\)" 1 "optimized" } } */ /* { dg-final { scan-tree-dump-times "\\(unsigned int\\)" 1 "optimized" } } */