Currently the bswap pass only look for bswap pattern by examining bitwise OR statement and doing following def-use chains. However a rotation (left or right) can finish a manual byteswap, as shown in the following example:
unsigned byteswap_ending_with_rotation (unsigned in) { in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8); in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16); return in; } which is compiled into: byteswap_ending_with_rotation (unsigned int in) { unsigned int _2; unsigned int _3; unsigned int _4; unsigned int _5; <bb 2>: _2 = in_1(D) & 4278255360; _3 = _2 >> 8; _4 = in_1(D) & 16711935; _5 = _4 << 8; in_6 = _5 | _3; in_7 = in_6 r>> 16; return in_7; } This patch adds rotation (left and right) to the list of statement to consider for byte swap. ChangeLog are as follows: *** gcc/ChangeLog *** 2014-09-30 Thomas Preud'homme <thomas.preudho...@arm.com> PR tree-optimization/63259 * tree-ssa-math-opts.c (pass_optimize_bswap::execute): Also consider bswap in LROTATE_EXPR and RROTATE_EXPR statements. *** gcc/testsuite/ChangeLog *** 2014-09-30 Thomas Preud'homme <thomas.preudho...@arm.com> PR tree-optimization/63259 * optimize-bswapsi-1.c (swap32_e): New bswap pass test. diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c index 580e6e0..d4b5740 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c @@ -64,5 +64,16 @@ swap32_d (SItype in) | (((in >> 24) & 0xFF) << 0); } -/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 4 "bswap" } } */ +/* This variant comes from PR63259. It compiles to a gimple sequence that ends + with a rotation instead of a bitwise OR. */ + +unsigned +swap32_e (unsigned in) +{ + in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8); + in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16); + return in; +} + +/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 5 "bswap" } } */ /* { dg-final { cleanup-tree-dump "bswap" } } */ diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 3c6e935..2023f2e 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2377,11 +2377,16 @@ pass_optimize_bswap::execute (function *fun) { gimple src_stmt, cur_stmt = gsi_stmt (gsi); tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; + enum tree_code code; struct symbolic_number n; bool bswap; - if (!is_gimple_assign (cur_stmt) - || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR) + if (!is_gimple_assign (cur_stmt)) + continue; + + code = gimple_assign_rhs_code (cur_stmt); + if (code != BIT_IOR_EXPR && code != LROTATE_EXPR + && code != RROTATE_EXPR) continue; src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); Testing was done by running the testsuite on arm-none-eabi target with QEMU emulating Cortex-M3: no regression were found. Due to the potential increase in compilation time, A bootstrap with sequential build (no -j option when calling make) and with default option was made with and without the patch. The results shows no increase compilation time: r215662 with patch: make 6167.48s user 401.03s system 99% cpu 1:49:52.07 total r215662 without patch make 6136.63s user 400.32s system 99% cpu 1:49:27.28 total Is it ok for trunk? Best regards, Thomas Preud'homme