On 20 September 2012 09:55, Christophe Lyon <christophe.l...@linaro.org> wrote: > On 20 September 2012 09:12, Eric Botcazou <ebotca...@adacore.com> wrote: >>> The attached patch catches C constructs: >>> (A << 8) | (A >> 8) >>> where A is unsigned 16 bits >>> and maps them to builtin_bswap16(A) which can provide more efficient >>> implementations on some targets. >> >> This belongs in tree-ssa-math-opts.c:execute_optimize_bswap instead. >> > OK I'll have a look at that. Actually I modified fold-const.c because > it's here that the similar 32 bits pattern is turned into a rotate. > >> When I implemented __builtin_bswap16, I didn't add this because I thought >> this >> would be overkill since the RTL combiner should be able to catch the pattern. >> Have you investigated on this front? But I don't have a strong opinion. >> > No I didn't. As I said above, I looked at where the 32 bits pattern > was handled and added the 16 bits one. > > Christophe.
Here is a new patch, modifying tree-ssa-math-opts.c as you suggested. It's indeed simpler :-) Validated with qemu-arm on target arm-none-linux-gnueabi. OK? Christophe.
2012-09-21 Christophe Lyon <christophe.l...@linaro.org> gcc/ * tree-ssa-math-opts.c (bswap_stats): Add found_16bit field. (execute_optimize_bswap): Add support for builtin_bswap16. gcc/testsuite/ * gcc.target/arm/builtin-bswap16-1.c: New testcase. diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index c3392fb..340f0df 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -154,6 +154,9 @@ static struct static struct { + /* Number of hand-written 16-bit bswaps found. */ + int found_16bit; + /* Number of hand-written 32-bit bswaps found. */ int found_32bit; @@ -1792,9 +1795,9 @@ static unsigned int execute_optimize_bswap (void) { basic_block bb; - bool bswap32_p, bswap64_p; + bool bswap16_p, bswap32_p, bswap64_p; bool changed = false; - tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; + tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; if (BITS_PER_UNIT != 8) return 0; @@ -1802,17 +1805,25 @@ execute_optimize_bswap (void) if (sizeof (HOST_WIDEST_INT) < 8) return 0; + bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16) + && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing); bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing || (bswap32_p && word_mode == SImode))); - if (!bswap32_p && !bswap64_p) + if (!bswap16_p && !bswap32_p && !bswap64_p) return 0; /* Determine the argument type of the builtins. The code later on assumes that the return and argument type are the same. */ + if (bswap16_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16); + bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + if (bswap32_p) { tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); @@ -1852,6 +1863,13 @@ execute_optimize_bswap (void) switch (type_size) { + case 16: + if (bswap16_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16); + bswap_type = bswap16_type; + } + break; case 32: if (bswap32_p) { @@ -1879,7 +1897,9 @@ execute_optimize_bswap (void) continue; changed = true; - if (type_size == 32) + if (type_size == 16) + bswap_stats.found_16bit++; + else if (type_size == 32) bswap_stats.found_32bit++; else bswap_stats.found_64bit++; @@ -1924,6 +1944,8 @@ execute_optimize_bswap (void) } } + statistics_counter_event (cfun, "16-bit bswap implementations found", + bswap_stats.found_16bit); statistics_counter_event (cfun, "32-bit bswap implementations found", bswap_stats.found_32bit); statistics_counter_event (cfun, "64-bit bswap implementations found", diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c b/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c new file mode 100644 index 0000000..6920f00 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/builtin-bswap16-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_arch_v6_ok } */ +/* { dg-add-options arm_arch_v6 } */ +/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */ + +unsigned short swapu16_1 (unsigned short x) +{ + return (x << 8) | (x >> 8); +} + +unsigned short swapu16_2 (unsigned short x) +{ + return (x >> 8) | (x << 8); +}