On 20 September 2012 09:55, Christophe Lyon <christophe.l...@linaro.org> wrote:
> On 20 September 2012 09:12, Eric Botcazou <ebotca...@adacore.com> wrote:
>>> The attached patch catches C constructs:
>>> (A << 8) | (A >> 8)
>>> where A is unsigned 16 bits
>>> and maps them to builtin_bswap16(A) which can provide more efficient
>>> implementations on some targets.
>>
>> This belongs in tree-ssa-math-opts.c:execute_optimize_bswap instead.
>>
> OK I'll have a look at that. Actually I modified fold-const.c because
> it's here that the similar 32 bits pattern is turned into a rotate.
>
>> When I implemented __builtin_bswap16, I didn't add this because I thought 
>> this
>> would be overkill since the RTL combiner should be able to catch the pattern.
>> Have you investigated on this front?  But I don't have a strong opinion.
>>
> No I didn't. As I said above, I looked at where the 32 bits pattern
> was handled and added the 16 bits one.
>
> Christophe.

Here is a new patch, modifying tree-ssa-math-opts.c as you suggested.
It's indeed simpler :-)

Validated with qemu-arm on target arm-none-linux-gnueabi.

OK?

Christophe.
2012-09-21  Christophe Lyon <christophe.l...@linaro.org>

        gcc/
        * tree-ssa-math-opts.c (bswap_stats): Add found_16bit field.
        (execute_optimize_bswap): Add support for builtin_bswap16.

        gcc/testsuite/
        * gcc.target/arm/builtin-bswap16-1.c: New testcase.

diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index c3392fb..340f0df 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -154,6 +154,9 @@ static struct
 
 static struct
 {
+  /* Number of hand-written 16-bit bswaps found.  */
+  int found_16bit;
+
   /* Number of hand-written 32-bit bswaps found.  */
   int found_32bit;
 
@@ -1792,9 +1795,9 @@ static unsigned int
 execute_optimize_bswap (void)
 {
   basic_block bb;
-  bool bswap32_p, bswap64_p;
+  bool bswap16_p, bswap32_p, bswap64_p;
   bool changed = false;
-  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
+  tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = 
NULL_TREE;
 
   if (BITS_PER_UNIT != 8)
     return 0;
@@ -1802,17 +1805,25 @@ execute_optimize_bswap (void)
   if (sizeof (HOST_WIDEST_INT) < 8)
     return 0;
 
+  bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16)
+              && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing);
   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
               && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
                   || (bswap32_p && word_mode == SImode)));
 
-  if (!bswap32_p && !bswap64_p)
+  if (!bswap16_p && !bswap32_p && !bswap64_p)
     return 0;
 
   /* Determine the argument type of the builtins.  The code later on
      assumes that the return and argument type are the same.  */
+  if (bswap16_p)
+    {
+      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
+      bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+    }
+
   if (bswap32_p)
     {
       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@@ -1852,6 +1863,13 @@ execute_optimize_bswap (void)
 
          switch (type_size)
            {
+           case 16:
+             if (bswap16_p)
+               {
+                 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
+                 bswap_type = bswap16_type;
+               }
+             break;
            case 32:
              if (bswap32_p)
                {
@@ -1879,7 +1897,9 @@ execute_optimize_bswap (void)
            continue;
 
          changed = true;
-         if (type_size == 32)
+         if (type_size == 16)
+           bswap_stats.found_16bit++;
+         else if (type_size == 32)
            bswap_stats.found_32bit++;
          else
            bswap_stats.found_64bit++;
@@ -1924,6 +1944,8 @@ execute_optimize_bswap (void)
        }
     }
 
+  statistics_counter_event (cfun, "16-bit bswap implementations found",
+                           bswap_stats.found_16bit);
   statistics_counter_event (cfun, "32-bit bswap implementations found",
                            bswap_stats.found_32bit);
   statistics_counter_event (cfun, "64-bit bswap implementations found",
diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c 
b/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c
new file mode 100644
index 0000000..6920f00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
+
+unsigned short swapu16_1 (unsigned short x)
+{
+  return (x << 8) | (x >> 8);
+}
+
+unsigned short swapu16_2 (unsigned short x)
+{
+  return (x >> 8) | (x << 8);
+}

Reply via email to