This patch add ARMv8.2-A FP16 three operands vector intrinsics.
Three operands intrinsics only contain fma and fms. 2016-07-07 Jiong Wang <jiong.w...@arm.com> gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (fma<mode>4): Extend to HF modes. (fnma<mode>4): Likewise. * config/aarch64/arm_neon.h (vfma_f16): New. (vfmaq_f16): Likewise. (vfms_f16): Likewise. (vfmsq_f16): Likewise.
>From dc2121d586b759b864d9653e188a14d1f7296f25 Mon Sep 17 00:00:00 2001 From: Jiong Wang <jiong.w...@arm.com> Date: Wed, 8 Jun 2016 10:21:25 +0100 Subject: [PATCH 04/14] [4/14] ARMv8.2 FP16 three operands vector intrinsics --- gcc/config/aarch64/aarch64-simd-builtins.def | 4 +++- gcc/config/aarch64/aarch64-simd.md | 28 ++++++++++++++-------------- gcc/config/aarch64/arm_neon.h | 26 ++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index fe17298..6ff5063 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -405,7 +405,9 @@ BUILTIN_VALL_F16 (STORE1, st1, 0) /* Implemented by fma<mode>4. */ - BUILTIN_VDQF (TERNOP, fma, 4) + BUILTIN_VHSDF (TERNOP, fma, 4) + /* Implemented by fnma<mode>4. */ + BUILTIN_VHSDF (TERNOP, fnma, 4) /* Implemented by aarch64_simd_bsl<mode>. */ BUILTIN_VDQQH (BSL_P, simd_bsl, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 0a80adb..576ad3c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1526,13 +1526,13 @@ ) (define_insn "fma<mode>4" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (fma:VDQF (match_operand:VDQF 1 "register_operand" "w") - (match_operand:VDQF 2 "register_operand" "w") - (match_operand:VDQF 3 "register_operand" "0")))] + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") + (match_operand:VHSDF 2 "register_operand" "w") + (match_operand:VHSDF 3 "register_operand" "0")))] "TARGET_SIMD" "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" - [(set_attr "type" "neon_fp_mla_<Vetype><q>")] + [(set_attr "type" "neon_fp_mla_<stype><q>")] ) (define_insn "*aarch64_fma4_elt<mode>" @@ -1599,15 +1599,15 @@ ) (define_insn "fnma<mode>4" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (fma:VDQF - (match_operand:VDQF 1 "register_operand" "w") - (neg:VDQF - (match_operand:VDQF 2 "register_operand" "w")) - (match_operand:VDQF 3 "register_operand" "0")))] - "TARGET_SIMD" - "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" - [(set_attr "type" "neon_fp_mla_<Vetype><q>")] + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (fma:VHSDF + (match_operand:VHSDF 1 "register_operand" "w") + (neg:VHSDF + (match_operand:VHSDF 2 "register_operand" "w")) + (match_operand:VHSDF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_mla_<stype><q>")] ) (define_insn "*aarch64_fnma4_elt<mode>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e78ff43..ad5b6fa 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26458,6 +26458,32 @@ vsubq_f16 (float16x8_t __a, float16x8_t __b) return __a - __b; } +/* ARMv8.2-A FP16 three operands vector intrinsics. */ + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_aarch64_fmav4hf (__b, __c, __a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_aarch64_fmav8hf (__b, __c, __a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_aarch64_fnmav4hf (__b, __c, __a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_aarch64_fnmav8hf (__b, __c, __a); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any -- 2.5.0