This patch adds ARMv8.2-A FP16 lane scalar intrinsics.
gcc/ 2016-07-07 Jiong Wang <jiong.w...@arm.com> * config/aarch64/arm_neon.h (vfmah_lane_f16): New. (vfmah_laneq_f16): Likewise. (vfmsh_lane_f16): Likewise. (vfmsh_laneq_f16): Likewise. (vmulh_lane_f16): Likewise. (vmulh_laneq_f16): Likewise. (vmulxh_lane_f16): Likewise. (vmulxh_laneq_f16): Likewise.
>From bcbe5035746c5684a3b9f0b62310f6aa276db364 Mon Sep 17 00:00:00 2001 From: Jiong Wang <jiong.w...@arm.com> Date: Thu, 9 Jun 2016 11:06:29 +0100 Subject: [PATCH 10/14] [10/14] ARMv8.2 FP16 lane scalar intrinsics --- gcc/config/aarch64/arm_neon.h | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e727ff1..09095d1 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26488,6 +26488,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) /* ARMv8.2-A FP16 lane vector intrinsics. */ +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfma_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26528,6 +26542,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfms_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26568,6 +26596,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26580,6 +26614,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) { @@ -26604,6 +26644,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b) return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26616,6 +26662,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) { -- 2.5.0