Hi, The attached patch catches C constructs: (A << 8) | (A >> 8) where A is unsigned 16 bits and maps them to builtin_bswap16(A) which can provide more efficient implementations on some targets.
The construct above is equivalent to the default bswap16 implementation. I have added a testcase for ARM, and have found no regression with qemu-arm on arm-none-linux-gnueabi. OK? Christophe
2012-09-19 Christophe Lyon <christophe.l...@linaro.org> gcc/ * fold-const.c (fold_binary_loc): call builtin_bswap16 when the equivalent construct is detected. gcc/testsuite/ * gcc.target/arm/builtin-bswap-2.c: New testcase. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 2bf5179..0ff7e8b 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -10326,6 +10326,99 @@ fold_binary_loc (location_t loc, } } + /* Catch bswap16 construct: (A << 8) | (A >> 8) where A is + unsigned 16 bits. + This has been expanded into: + (ior:SI (lshift:SI (nop:SI A:HI) 8) + (nop:SI (rshift:HI A:HI 8))) + */ + { + enum tree_code code0, code1; + tree my_arg0 = arg0; + tree my_arg1= arg1; + tree rtype; + + code0 = TREE_CODE (arg0); + code1 = TREE_CODE (arg1); + if (code1 == NOP_EXPR) + { + my_arg1 = TREE_OPERAND (arg1, 0); + code1 = TREE_CODE (my_arg1); + } + else if (code0 == NOP_EXPR) + { + my_arg0 = TREE_OPERAND (arg0, 0); + code0 = TREE_CODE (my_arg0); + } + + /* Handle (A << C1) + (A >> C1). */ + if ((code1 == RSHIFT_EXPR && code0 == LSHIFT_EXPR) + && (TREE_CODE (TREE_OPERAND (my_arg0, 0)) == NOP_EXPR) + && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg0, 0), 0), + TREE_OPERAND (my_arg1, 0), 0) + && (rtype = TREE_TYPE (TREE_OPERAND (my_arg1, 0)), + TYPE_UNSIGNED (rtype))) + { + tree tree01, tree11; + enum tree_code code01, code11; + + tree01 = TREE_OPERAND (my_arg0, 1); + tree11 = TREE_OPERAND (my_arg1, 1); + STRIP_NOPS (tree01); + STRIP_NOPS (tree11); + code01 = TREE_CODE (tree01); + code11 = TREE_CODE (tree11); + + /* Check that shift amount is 8, and input 16 bits wide. */ + if (code01 == INTEGER_CST + && code11 == INTEGER_CST + && TREE_INT_CST_HIGH (tree01) == 0 + && TREE_INT_CST_HIGH (tree11) == 0 + && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11) + && TREE_INT_CST_LOW (tree01) == 8 + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg1, 0))) == 16) + { + tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16); + return build_call_expr_loc (loc, bswapfn, 1, + TREE_OPERAND (my_arg1, 0)); + } + } + + /* Handle (A >> C1) + (A << C1). */ + else if ((code0 == RSHIFT_EXPR && code1 == LSHIFT_EXPR) + && (TREE_CODE (TREE_OPERAND (my_arg1, 0)) == NOP_EXPR) + && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg1, 0), + 0), + TREE_OPERAND (my_arg0, 0), 0) + && (rtype = TREE_TYPE (TREE_OPERAND (my_arg0, 0)), + TYPE_UNSIGNED (rtype))) + { + tree tree01, tree11; + enum tree_code code01, code11; + + tree01 = TREE_OPERAND (my_arg0, 1); + tree11 = TREE_OPERAND (my_arg1, 1); + STRIP_NOPS (tree01); + STRIP_NOPS (tree11); + code01 = TREE_CODE (tree01); + code11 = TREE_CODE (tree11); + + /* Check that shift amount is 8, and input 16 bits wide. */ + if (code01 == INTEGER_CST + && code11 == INTEGER_CST + && TREE_INT_CST_HIGH (tree01) == 0 + && TREE_INT_CST_HIGH (tree11) == 0 + && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11) + && TREE_INT_CST_LOW (tree01) == 8 + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg0, 0))) == 16) + { + tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16); + return build_call_expr_loc (loc, bswapfn, 1, + TREE_OPERAND (my_arg0, 0)); + } + } + } + associate: /* In most languages, can't associate operations on floats through parentheses. Rather than remember where the parentheses were, we diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c new file mode 100644 index 0000000..93dbb35 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_arch_v6_ok } */ +/* { dg-add-options arm_arch_v6 } */ +/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */ + +unsigned short swapu16_1 (unsigned short x) +{ + return (x << 8) | (x >> 8); +} + +unsigned short swapu16_2 (unsigned short x) +{ + return (x >> 8) | (x << 8); +} + +unsigned int swapu32_1 (unsigned int x) +{ + return (x << 16) | (x >> 16); +} + +unsigned int swapu32_2 (unsigned int x) +{ + return (x >> 16) | (x << 16); +}