Hi all,This patch enables aarch64 to vectorise bswap[16,32,64] operations by using the AdvancedSIMD forms of the rev[16,32,64] instructions.
The TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION hook is extended to return the vectorised forms of __builtin_bswap* where possible and vector bswap patterns are added.
I've added the tests in vect.exp and a new effective target check (vect_bswap) that can be extended for other targets in the future if they can also vectorise these operations. Is that ok?
Bootstrapped and tested aarch64-none-linux-gnu. Ok for trunk? Thanks, Kyrill 2014-04-15 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern. * config/aarch64/aarch64-simd-builtins.def: Define vector bswap builtins. * config/aarch64/iterator.md (VDQHSD): New mode iterator. (Vrevsuff): New mode attribute. 2014-04-15 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * lib/target-supports.exp (check_effective_target_vect_bswap): New. * gcc.dg/vect/vect-bswap16: New test. * gcc.dg/vect/vect-bswap32: Likewise. * gcc.dg/vect/vect-bswap64: Likewise.
commit 0d6d820881443a7ce7f9bd51f35aff04866c5e57 Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com> Date: Thu Apr 3 09:22:14 2014 +0100 [AArch64] vectorise bswap diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 55cfe0a..d839a40 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1086,7 +1086,29 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return aarch64_builtin_decls[builtin]; } - + case BUILT_IN_BSWAP16: +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Imode && in_n == C) + if (AARCH64_CHECK_BUILTIN_MODE (4, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; + else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; + else + return NULL_TREE; + case BUILT_IN_BSWAP32: + if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; + else + return NULL_TREE; + case BUILT_IN_BSWAP64: + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; + else + return NULL_TREE; default: return NULL_TREE; } diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index c9b7570..e9736da 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -330,6 +330,8 @@ VAR1 (UNOP, floatunsv4si, 2, v4sf) VAR1 (UNOP, floatunsv2di, 2, v2df) + VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) + /* Implemented by aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ BUILTIN_VALL (BINOP, zip1, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 73aee2c..75db3e8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -286,6 +286,14 @@ [(set_attr "type" "neon_mul_<Vetype><q>")] ) +(define_insn "bswap<mode>" + [(set (match_operand:VDQHSD 0 "register_operand" "=w") + (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] + "TARGET_SIMD" + "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_rev<q>")] +) + (define_insn "*aarch64_mul3_elt<mode>" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f1339b8..2b5ebd1 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -150,6 +150,9 @@ ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for H, S and D types. +(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) + ;; Vector modes for Q, H and S types. (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) @@ -352,6 +355,9 @@ (V2DI "2d") (V2SF "2s") (V4SF "4s") (V2DF "2d")]) +(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") + (V4SI "32") (V2DI "64")]) + (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") (V4HI ".4h") (V8HI ".8h") (V2SI ".2s") (V4SI ".4s") diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c new file mode 100644 index 0000000..b452a29 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo16 (unsigned short int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap16 (a[i]); +} + +int +main (void) +{ + unsigned short arr[N]; + unsigned short expect[N]; + int i; + + for (i < 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap16 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo16 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c new file mode 100644 index 0000000..f90f853 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo32 (unsigned int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap32 (a[i]); +} + +int +main (void) +{ + unsigned int arr[N]; + unsigned int expect[N]; + int i; + + for (i < 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap32 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo32 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c new file mode 100644 index 0000000..319ab34 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo64 (unsigned long long* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap64 (a[i]); +} + +int +main (void) +{ + unsigned long long arr[N]; + unsigned long long expect[N]; + int i; + + for (i < 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap64 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo64 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 57b10d0..910b3dc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3271,6 +3271,24 @@ proc check_effective_target_vect_shift { } { return $et_vect_shift_saved } +# Return 1 if the target supports vector bswap operations. + +proc check_effective_target_vect_bswap { } { + global et_vect_bswap_saved + + if [info exists et_vect_bswap_saved] { + verbose "check_effective_target_vect_bswap: using cached result" 2 + } else { + set et_vect_bswap_saved 0 + if { [istarget aarch64*-*-*] } { + set et_vect_bswap_saved 1 + } + } + + verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 + return $et_vect_bswap_saved +} + # Return 1 if the target supports hardware vector shift operation for char. proc check_effective_target_vect_shift_char { } {