Currently the bswap pass only look for bswap pattern by examining bitwise
OR statement and doing following def-use chains. However a rotation
(left or right) can finish a manual byteswap, as shown in the following example:

unsigned
byteswap_ending_with_rotation (unsigned in)
{
    in = ((in & 0xff00ff00) >>  8) | ((in & 0x00ff00ff) <<  8);
    in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
    return in;
}

which is compiled into:

byteswap_ending_with_rotation (unsigned int in)
{
  unsigned int _2;
  unsigned int _3;
  unsigned int _4;
  unsigned int _5;

  <bb 2>:
  _2 = in_1(D) & 4278255360;
  _3 = _2 >> 8;
  _4 = in_1(D) & 16711935;
  _5 = _4 << 8;
  in_6 = _5 | _3;
  in_7 = in_6 r>> 16;
  return in_7;

}

This patch adds rotation (left and right) to the list of statement to consider 
for byte swap.

ChangeLog are as follows:

*** gcc/ChangeLog ***

2014-09-30  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/63259
        * tree-ssa-math-opts.c (pass_optimize_bswap::execute): Also consider
        bswap in LROTATE_EXPR and RROTATE_EXPR statements.

*** gcc/testsuite/ChangeLog ***

2014-09-30  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/63259
        * optimize-bswapsi-1.c (swap32_e): New bswap pass test.


diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c 
b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
index 580e6e0..d4b5740 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
@@ -64,5 +64,16 @@ swap32_d (SItype in)
         | (((in >> 24) & 0xFF) << 0);
 }
 
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 4 
"bswap" } } */
+/* This variant comes from PR63259.  It compiles to a gimple sequence that ends
+   with a rotation instead of a bitwise OR.  */
+
+unsigned
+swap32_e (unsigned in)
+{
+  in = ((in & 0xff00ff00) >>  8) | ((in & 0x00ff00ff) <<  8);
+  in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
+  return in;
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 5 
"bswap" } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 3c6e935..2023f2e 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2377,11 +2377,16 @@ pass_optimize_bswap::execute (function *fun)
         {
          gimple src_stmt, cur_stmt = gsi_stmt (gsi);
          tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+         enum tree_code code;
          struct symbolic_number n;
          bool bswap;
 
-         if (!is_gimple_assign (cur_stmt)
-             || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR)
+         if (!is_gimple_assign (cur_stmt))
+           continue;
+
+         code = gimple_assign_rhs_code (cur_stmt);
+         if (code != BIT_IOR_EXPR && code != LROTATE_EXPR
+             && code != RROTATE_EXPR)
            continue;
 
          src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);

Testing was done by running the testsuite on arm-none-eabi target with QEMU
emulating Cortex-M3: no regression were found. Due to the potential increase
in compilation time, A bootstrap with sequential build (no -j option when 
calling
make) and with default option was made with and without the patch. The
results shows no increase compilation time:

r215662 with patch:
make  6167.48s user 401.03s system 99% cpu 1:49:52.07 total

r215662 without patch
make  6136.63s user 400.32s system 99% cpu 1:49:27.28 total

Is it ok for trunk?

Best regards,

Thomas Preud'homme



Reply via email to