This patch add ARMv8.2-A FP16 reduction vector intrinsics.
gcc/ 2016-07-07 Jiong Wang <jiong.w...@arm.com> * config/aarch64/arm_neon.h (vmaxv_f16): New. (vmaxvq_f16): Likewise. (vminv_f16): Likewise. (vminvq_f16): Likewise. (vmaxnmv_f16): Likewise. (vmaxnmvq_f16): Likewise. (vminnmv_f16): Likewise. (vminnmvq_f16): Likewise. * config/aarch64/iterators.md (vp): Support HF modes.
>From 514e5d195867d2f53fac50804748976626748f81 Mon Sep 17 00:00:00 2001 From: Jiong Wang <jiong.w...@arm.com> Date: Wed, 8 Jun 2016 10:23:17 +0100 Subject: [PATCH 06/14] [6/14] ARMv8.2 FP16 reduction vector intrinsics --- gcc/config/aarch64/aarch64-simd-builtins.def | 8 ++--- gcc/config/aarch64/aarch64-simd.md | 12 +++---- gcc/config/aarch64/arm_neon.h | 50 ++++++++++++++++++++++++++++ gcc/config/aarch64/iterators.md | 7 ++-- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 6ff5063..64c5f86 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -234,12 +234,12 @@ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */ - BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) - BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10) /* Implemented by <maxmin><mode>3. smax variants map to fmaxnm, diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c0600df..d5b25fa 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2073,8 +2073,8 @@ ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). (define_expand "reduc_<maxmin_uns>_scal_<mode>" [(match_operand:<VEL> 0 "register_operand") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] - FMAXMINV)] + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] + FMAXMINV)] "TARGET_SIMD" { rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); @@ -2121,12 +2121,12 @@ ) (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] - FMAXMINV))] + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] + FMAXMINV))] "TARGET_SIMD" "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" - [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] + [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] ) ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b09a3a7..f3e5d0e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26638,6 +26638,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b) return vmulxq_f16 (__a, vdupq_n_f16 (__b)); } +/* ARMv8.2-A FP16 reduction vector intrinsics. */ + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v8hf (__a); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8d4dc6c..011b937 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -159,6 +159,8 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI + V4HF V8HF V2SF V4SF V2DF]) ;; Vector modes for S type. (define_mode_iterator VDQ_SI [V2SI V4SI]) @@ -760,8 +762,9 @@ (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") (V2SI "p") (V4SI "v") - (V2DI "p") (V2DF "p") - (V2SF "p") (V4SF "v")]) + (V2DI "p") (V2DF "p") + (V2SF "p") (V4SF "v") + (V4HF "v") (V8HF "v")]) (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) -- 2.5.0