Hi Jakub and Marc, Here's version #3 of the patch to recognize bswap32 and bswap64 that now also implements Jakub's suggestion to support addition and xor in addition to bitwise ior when recognizing the union of highpart and lowpart (and two additional tests to check for these variants).
This revised patch has been tested on x86_64-pc-linux-gnu with a "make bootstrap" and "make -k check" with no new failures, and confirming all four new tests pass. Ok for mainline? 2020-08-17 Roger Sayle <ro...@nextmovesoftware.com> Marc Glisse <marc.gli...@inria.fr> Jakub Jelinek <ja...@redhat.com> gcc/ChangeLog * match.pd (((T)bswapX(x)<<C)|bswapX(x>>C) -> bswapY(x)): New simplifications to recognize __builtin_bswap{32,64}. gcc/testsuite/ChangeLog * gcc.dg/fold-bswap-1.c: New test. * gcc.dg/fold-bswap-2.c: New test. * gcc.dg/fold-bswap-3.c: New test. * gcc.dg/fold-bswap-4.c: New test. Thanks in advance, Roger -- -----Original Message----- From: Jakub Jelinek <ja...@redhat.com> Sent: 15 August 2020 14:26 To: Roger Sayle <ro...@nextmovesoftware.com> Cc: 'GCC Patches' <gcc-patches@gcc.gnu.org>; 'Marc Glisse' <marc.gli...@inria.fr> Subject: Re: [PATCH] middle-end: Recognize idioms for bswap32 and bswap64 in match.pd. On Sat, Aug 15, 2020 at 11:09:17AM +0100, Roger Sayle wrote: > +/* Recognize ((T)bswap32(x)<<32)|bswap32(x>>32) as bswap64(x). */ > +(simplify > + (bit_ior:c Any reason for supporting bit_ior only? Don't plus:c or bit_xor:c work the same (i.e. use (for op (bit_ior bit_xor plus) ...)? Jakub
diff --git a/gcc/match.pd b/gcc/match.pd index c3b8816..3d7a0db 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3410,6 +3410,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (bswap (bitop:c (bswap @0) @1)) (bitop @0 (bswap @1))))) +/* Recognize ((T)bswap32(x)<<32)|bswap32(x>>32) as bswap64(x). */ +(for op (bit_ior bit_xor plus) + (simplify + (op:c + (lshift (convert (BUILT_IN_BSWAP32 (convert@0 @1))) + INTEGER_CST@2) + (convert (BUILT_IN_BSWAP32 (convert@3 (rshift @1 @2))))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_PRECISION (type) == 64 + && types_match (TREE_TYPE (@1), uint64_type_node) + && types_match (TREE_TYPE (@0), uint32_type_node) + && types_match (TREE_TYPE (@3), uint32_type_node) + && wi::to_widest (@2) == 32) + (convert (BUILT_IN_BSWAP64 @1))))) + +/* Recognize ((T)bswap16(x)<<16)|bswap16(x>>16) as bswap32(x). */ +(for op (bit_ior bit_xor plus) + (simplify + (op:c + (lshift + (convert (BUILT_IN_BSWAP16 (convert (bit_and @0 INTEGER_CST@1)))) + (INTEGER_CST@2)) + (convert (BUILT_IN_BSWAP16 (convert (rshift @0 @2))))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_PRECISION (type) == 32 + && types_match (TREE_TYPE (@0), uint32_type_node) + && wi::to_widest (@1) == 65535 + && wi::to_widest (@2) == 16) + (convert (BUILT_IN_BSWAP32 @0))))) /* Combine COND_EXPRs and VEC_COND_EXPRs. */
diff --git a/gcc/testsuite/gcc.dg/fold-bswap-1.c b/gcc/testsuite/gcc.dg/fold-bswap-1.c new file mode 100644 index 0000000..3abb862 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bswap-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +unsigned int swap32(unsigned int x) +{ + if (sizeof(unsigned int)==4 && sizeof(unsigned short)==2) { + unsigned int a = __builtin_bswap16(x); + x >>= 16; + a <<= 16; + return __builtin_bswap16(x) | a; + } else return __builtin_bswap32(x); +} + +unsigned long swap64(unsigned long x) +{ + if (sizeof(unsigned long)==8 && sizeof(unsigned int)==4) { + unsigned long a = __builtin_bswap32(x); + x >>= 32; + a <<= 32; + return __builtin_bswap32(x) | a; + } else return __builtin_bswap64(x); +} + +/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-bswap-2.c b/gcc/testsuite/gcc.dg/fold-bswap-2.c new file mode 100644 index 0000000..a581fd6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bswap-2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int swap32(unsigned int x) +{ + if (sizeof(int)==4 && sizeof(short)==2) { + int a = __builtin_bswap16(x); + x >>= 16; + a <<= 16; + return __builtin_bswap16(x) | a; + } else return __builtin_bswap32(x); +} + +long swap64(unsigned long x) +{ + if (sizeof(long)==8 && sizeof(int)==4) { + long a = __builtin_bswap32(x); + x >>= 32; + a <<= 32; + return __builtin_bswap32(x) | a; + } else return __builtin_bswap64(x); +} + +/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-bswap-3.c b/gcc/testsuite/gcc.dg/fold-bswap-3.c new file mode 100644 index 0000000..13bb6eb --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bswap-3.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +unsigned int swap32(unsigned int x) +{ + if (sizeof(unsigned int)==4 && sizeof(unsigned short)==2) { + unsigned int a = __builtin_bswap16(x); + x >>= 16; + a <<= 16; + return __builtin_bswap16(x) + a; + } else return __builtin_bswap32(x); +} + +unsigned long swap64(unsigned long x) +{ + if (sizeof(unsigned long)==8 && sizeof(unsigned int)==4) { + unsigned long a = __builtin_bswap32(x); + x >>= 32; + a <<= 32; + return __builtin_bswap32(x) + a; + } else return __builtin_bswap64(x); +} + +/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-bswap-4.c b/gcc/testsuite/gcc.dg/fold-bswap-4.c new file mode 100644 index 0000000..1ae2084 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bswap-4.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +unsigned int swap32(unsigned int x) +{ + if (sizeof(unsigned int)==4 && sizeof(unsigned short)==2) { + unsigned int a = __builtin_bswap16(x); + x >>= 16; + a <<= 16; + return __builtin_bswap16(x) ^ a; + } else return __builtin_bswap32(x); +} + +unsigned long swap64(unsigned long x) +{ + if (sizeof(unsigned long)==8 && sizeof(unsigned int)==4) { + unsigned long a = __builtin_bswap32(x); + x >>= 32; + a <<= 32; + return __builtin_bswap32(x) ^ a; + } else return __builtin_bswap64(x); +} + +/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 1 "optimized" } } */ +