Implement vmlaldavq, vmlaldavxq, vmlsldavq, vmlsldavxq using the new MVE builtins framework.
2022-10-25 Christophe Lyon <christophe.l...@arm.com> gcc/ * config/arm/arm-mve-builtins-base.cc (vmlaldavq, vmlaldavxq) (vmlsldavq, vmlsldavxq): New. * config/arm/arm-mve-builtins-base.def (vmlaldavq, vmlaldavxq) (vmlsldavq, vmlsldavxq): New. * config/arm/arm-mve-builtins-base.h (vmlaldavq, vmlaldavxq) (vmlsldavq, vmlsldavxq): New. * config/arm/arm_mve.h (vmlaldavq): Remove. (vmlsldavxq): Remove. (vmlsldavq): Remove. (vmlaldavxq): Remove. (vmlaldavq_p): Remove. (vmlaldavxq_p): Remove. (vmlsldavq_p): Remove. (vmlsldavxq_p): Remove. (vmlaldavq_u16): Remove. (vmlsldavxq_s16): Remove. (vmlsldavq_s16): Remove. (vmlaldavxq_s16): Remove. (vmlaldavq_s16): Remove. (vmlaldavq_u32): Remove. (vmlsldavxq_s32): Remove. (vmlsldavq_s32): Remove. (vmlaldavxq_s32): Remove. (vmlaldavq_s32): Remove. (vmlaldavq_p_s16): Remove. (vmlaldavxq_p_s16): Remove. (vmlsldavq_p_s16): Remove. (vmlsldavxq_p_s16): Remove. (vmlaldavq_p_u16): Remove. (vmlaldavq_p_s32): Remove. (vmlaldavxq_p_s32): Remove. (vmlsldavq_p_s32): Remove. (vmlsldavxq_p_s32): Remove. (vmlaldavq_p_u32): Remove. (__arm_vmlaldavq_u16): Remove. (__arm_vmlsldavxq_s16): Remove. (__arm_vmlsldavq_s16): Remove. (__arm_vmlaldavxq_s16): Remove. (__arm_vmlaldavq_s16): Remove. (__arm_vmlaldavq_u32): Remove. (__arm_vmlsldavxq_s32): Remove. (__arm_vmlsldavq_s32): Remove. (__arm_vmlaldavxq_s32): Remove. (__arm_vmlaldavq_s32): Remove. (__arm_vmlaldavq_p_s16): Remove. (__arm_vmlaldavxq_p_s16): Remove. (__arm_vmlsldavq_p_s16): Remove. (__arm_vmlsldavxq_p_s16): Remove. (__arm_vmlaldavq_p_u16): Remove. (__arm_vmlaldavq_p_s32): Remove. (__arm_vmlaldavxq_p_s32): Remove. (__arm_vmlsldavq_p_s32): Remove. (__arm_vmlsldavxq_p_s32): Remove. (__arm_vmlaldavq_p_u32): Remove. (__arm_vmlaldavq): Remove. (__arm_vmlsldavxq): Remove. (__arm_vmlsldavq): Remove. (__arm_vmlaldavxq): Remove. (__arm_vmlaldavq_p): Remove. (__arm_vmlaldavxq_p): Remove. (__arm_vmlsldavq_p): Remove. (__arm_vmlsldavxq_p): Remove. --- gcc/config/arm/arm-mve-builtins-base.cc | 4 + gcc/config/arm/arm-mve-builtins-base.def | 4 + gcc/config/arm/arm-mve-builtins-base.h | 4 + gcc/config/arm/arm_mve.h | 366 ----------------------- 4 files changed, 12 insertions(+), 366 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index a81cf4cba5e..af1a2c9942a 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -285,10 +285,14 @@ FUNCTION_PRED_P_S (vmladavaxq, VMLADAVAXQ) FUNCTION_PRED_P_S_U (vmladavaq, VMLADAVAQ) FUNCTION_PRED_P_S_U (vmladavq, VMLADAVQ) FUNCTION_PRED_P_S (vmladavxq, VMLADAVXQ) +FUNCTION_PRED_P_S_U (vmlaldavq, VMLALDAVQ) +FUNCTION_PRED_P_S (vmlaldavxq, VMLALDAVXQ) FUNCTION_PRED_P_S (vmlsdavaq, VMLSDAVAQ) FUNCTION_PRED_P_S (vmlsdavaxq, VMLSDAVAXQ) FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ) FUNCTION_PRED_P_S (vmlsdavxq, VMLSDAVXQ) +FUNCTION_PRED_P_S (vmlsldavq, VMLSLDAVQ) +FUNCTION_PRED_P_S (vmlsldavxq, VMLSLDAVXQ) FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ) FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ) FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index 934f45bc220..f7f353b34a7 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -54,10 +54,14 @@ DEF_MVE_FUNCTION (vmladavaq, binary_acca_int32, all_integer, p_or_none) DEF_MVE_FUNCTION (vmladavaxq, binary_acca_int32, all_signed, p_or_none) DEF_MVE_FUNCTION (vmladavq, binary_acc_int32, all_integer, p_or_none) DEF_MVE_FUNCTION (vmladavxq, binary_acc_int32, all_signed, p_or_none) +DEF_MVE_FUNCTION (vmlaldavq, binary_acc_int64, integer_16_32, p_or_none) +DEF_MVE_FUNCTION (vmlaldavxq, binary_acc_int64, signed_16_32, p_or_none) DEF_MVE_FUNCTION (vmlsdavaq, binary_acca_int32, all_signed, p_or_none) DEF_MVE_FUNCTION (vmlsdavaxq, binary_acca_int32, all_signed, p_or_none) DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none) DEF_MVE_FUNCTION (vmlsdavxq, binary_acc_int32, all_signed, p_or_none) +DEF_MVE_FUNCTION (vmlsldavq, binary_acc_int64, signed_16_32, p_or_none) +DEF_MVE_FUNCTION (vmlsldavxq, binary_acc_int64, signed_16_32, p_or_none) DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vmovltq, unary_widen, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 1d29a940200..08d07a7c6d5 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -67,10 +67,14 @@ extern const function_base *const vmladavaq; extern const function_base *const vmladavaxq; extern const function_base *const vmladavq; extern const function_base *const vmladavxq; +extern const function_base *const vmlaldavq; +extern const function_base *const vmlaldavxq; extern const function_base *const vmlsdavaq; extern const function_base *const vmlsdavaxq; extern const function_base *const vmlsdavq; extern const function_base *const vmlsdavxq; +extern const function_base *const vmlsldavq; +extern const function_base *const vmlsldavxq; extern const function_base *const vmovlbq; extern const function_base *const vmovltq; extern const function_base *const vmovnbq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index f8afe19e86e..50e9ecbfc85 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -55,12 +55,8 @@ #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b) #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b) #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b) -#define vmlaldavq(__a, __b) __arm_vmlaldavq(__a, __b) #define vqdmulltq(__a, __b) __arm_vqdmulltq(__a, __b) #define vqdmullbq(__a, __b) __arm_vqdmullbq(__a, __b) -#define vmlsldavxq(__a, __b) __arm_vmlsldavxq(__a, __b) -#define vmlsldavq(__a, __b) __arm_vmlsldavq(__a, __b) -#define vmlaldavxq(__a, __b) __arm_vmlaldavxq(__a, __b) #define vrmlaldavhq(__a, __b) __arm_vrmlaldavhq(__a, __b) #define vrmlsldavhxq(__a, __b) __arm_vrmlsldavhxq(__a, __b) #define vrmlsldavhq(__a, __b) __arm_vrmlsldavhq(__a, __b) @@ -97,10 +93,6 @@ #define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c) #define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c) #define vmlsldavaxq(__a, __b, __c) __arm_vmlsldavaxq(__a, __b, __c) -#define vmlaldavq_p(__a, __b, __p) __arm_vmlaldavq_p(__a, __b, __p) -#define vmlaldavxq_p(__a, __b, __p) __arm_vmlaldavxq_p(__a, __b, __p) -#define vmlsldavq_p(__a, __b, __p) __arm_vmlsldavq_p(__a, __b, __p) -#define vmlsldavxq_p(__a, __b, __p) __arm_vmlsldavxq_p(__a, __b, __p) #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p) #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p) #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p) @@ -394,17 +386,12 @@ #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b) #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b) #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b) -#define vmlaldavq_u16(__a, __b) __arm_vmlaldavq_u16(__a, __b) #define vbicq_n_u16(__a, __imm) __arm_vbicq_n_u16(__a, __imm) #define vqdmulltq_s16(__a, __b) __arm_vqdmulltq_s16(__a, __b) #define vqdmulltq_n_s16(__a, __b) __arm_vqdmulltq_n_s16(__a, __b) #define vqdmullbq_s16(__a, __b) __arm_vqdmullbq_s16(__a, __b) #define vqdmullbq_n_s16(__a, __b) __arm_vqdmullbq_n_s16(__a, __b) #define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b) -#define vmlsldavxq_s16(__a, __b) __arm_vmlsldavxq_s16(__a, __b) -#define vmlsldavq_s16(__a, __b) __arm_vmlsldavq_s16(__a, __b) -#define vmlaldavxq_s16(__a, __b) __arm_vmlaldavxq_s16(__a, __b) -#define vmlaldavq_s16(__a, __b) __arm_vmlaldavq_s16(__a, __b) #define vcmulq_rot90_f16(__a, __b) __arm_vcmulq_rot90_f16(__a, __b) #define vcmulq_rot270_f16(__a, __b) __arm_vcmulq_rot270_f16(__a, __b) #define vcmulq_rot180_f16(__a, __b) __arm_vcmulq_rot180_f16(__a, __b) @@ -415,17 +402,12 @@ #define vbicq_n_s16(__a, __imm) __arm_vbicq_n_s16(__a, __imm) #define vmulltq_poly_p16(__a, __b) __arm_vmulltq_poly_p16(__a, __b) #define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b) -#define vmlaldavq_u32(__a, __b) __arm_vmlaldavq_u32(__a, __b) #define vbicq_n_u32(__a, __imm) __arm_vbicq_n_u32(__a, __imm) #define vqdmulltq_s32(__a, __b) __arm_vqdmulltq_s32(__a, __b) #define vqdmulltq_n_s32(__a, __b) __arm_vqdmulltq_n_s32(__a, __b) #define vqdmullbq_s32(__a, __b) __arm_vqdmullbq_s32(__a, __b) #define vqdmullbq_n_s32(__a, __b) __arm_vqdmullbq_n_s32(__a, __b) #define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b) -#define vmlsldavxq_s32(__a, __b) __arm_vmlsldavxq_s32(__a, __b) -#define vmlsldavq_s32(__a, __b) __arm_vmlsldavq_s32(__a, __b) -#define vmlaldavxq_s32(__a, __b) __arm_vmlaldavxq_s32(__a, __b) -#define vmlaldavq_s32(__a, __b) __arm_vmlaldavq_s32(__a, __b) #define vcmulq_rot90_f32(__a, __b) __arm_vcmulq_rot90_f32(__a, __b) #define vcmulq_rot270_f32(__a, __b) __arm_vcmulq_rot270_f32(__a, __b) #define vcmulq_rot180_f32(__a, __b) __arm_vcmulq_rot180_f32(__a, __b) @@ -568,10 +550,6 @@ #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p) #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p) #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p) -#define vmlaldavq_p_s16(__a, __b, __p) __arm_vmlaldavq_p_s16(__a, __b, __p) -#define vmlaldavxq_p_s16(__a, __b, __p) __arm_vmlaldavxq_p_s16(__a, __b, __p) -#define vmlsldavq_p_s16(__a, __b, __p) __arm_vmlsldavq_p_s16(__a, __b, __p) -#define vmlsldavxq_p_s16(__a, __b, __p) __arm_vmlsldavxq_p_s16(__a, __b, __p) #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p) #define vmvnq_m_n_u16(__inactive, __imm, __p) __arm_vmvnq_m_n_u16(__inactive, __imm, __p) #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p) @@ -579,7 +557,6 @@ #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p) #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p) #define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b, __c) -#define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b, __p) #define vmvnq_m_n_s32(__inactive, __imm, __p) __arm_vmvnq_m_n_s32(__inactive, __imm, __p) #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c) #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c) @@ -597,10 +574,6 @@ #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p) #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p) #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p) -#define vmlaldavq_p_s32(__a, __b, __p) __arm_vmlaldavq_p_s32(__a, __b, __p) -#define vmlaldavxq_p_s32(__a, __b, __p) __arm_vmlaldavxq_p_s32(__a, __b, __p) -#define vmlsldavq_p_s32(__a, __b, __p) __arm_vmlsldavq_p_s32(__a, __b, __p) -#define vmlsldavxq_p_s32(__a, __b, __p) __arm_vmlsldavxq_p_s32(__a, __b, __p) #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p) #define vmvnq_m_n_u32(__inactive, __imm, __p) __arm_vmvnq_m_n_u32(__inactive, __imm, __p) #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p) @@ -608,7 +581,6 @@ #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p) #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p) #define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b, __c) -#define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b, __p) #define vsriq_m_n_s8(__a, __b, __imm, __p) __arm_vsriq_m_n_s8(__a, __b, __imm, __p) #define vcvtq_m_n_f16_u16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a, __imm6, __p) #define vqshluq_m_n_s8(__inactive, __a, __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a, __imm, __p) @@ -1849,13 +1821,6 @@ __arm_vmullbq_poly_p8 (uint8x16_t __a, uint8x16_t __b) return __builtin_mve_vmullbq_poly_pv16qi (__a, __b); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_u16 (uint16x8_t __a, uint16x8_t __b) -{ - return __builtin_mve_vmlaldavq_uv8hi (__a, __b); -} - __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq_n_u16 (uint16x8_t __a, const int __imm) @@ -1891,34 +1856,6 @@ __arm_vqdmullbq_n_s16 (int16x8_t __a, int16_t __b) return __builtin_mve_vqdmullbq_n_sv8hi (__a, __b); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_s16 (int16x8_t __a, int16x8_t __b) -{ - return __builtin_mve_vmlsldavxq_sv8hi (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_s16 (int16x8_t __a, int16x8_t __b) -{ - return __builtin_mve_vmlsldavq_sv8hi (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_s16 (int16x8_t __a, int16x8_t __b) -{ - return __builtin_mve_vmlaldavxq_sv8hi (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_s16 (int16x8_t __a, int16x8_t __b) -{ - return __builtin_mve_vmlaldavq_sv8hi (__a, __b); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq_n_s16 (int16x8_t __a, const int __imm) @@ -1940,13 +1877,6 @@ __arm_vmullbq_poly_p16 (uint16x8_t __a, uint16x8_t __b) return __builtin_mve_vmullbq_poly_pv8hi (__a, __b); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_u32 (uint32x4_t __a, uint32x4_t __b) -{ - return __builtin_mve_vmlaldavq_uv4si (__a, __b); -} - __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq_n_u32 (uint32x4_t __a, const int __imm) @@ -1982,34 +1912,6 @@ __arm_vqdmullbq_n_s32 (int32x4_t __a, int32_t __b) return __builtin_mve_vqdmullbq_n_sv4si (__a, __b); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_s32 (int32x4_t __a, int32x4_t __b) -{ - return __builtin_mve_vmlsldavxq_sv4si (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_s32 (int32x4_t __a, int32x4_t __b) -{ - return __builtin_mve_vmlsldavq_sv4si (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_s32 (int32x4_t __a, int32x4_t __b) -{ - return __builtin_mve_vmlaldavxq_sv4si (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_s32 (int32x4_t __a, int32x4_t __b) -{ - return __builtin_mve_vmlaldavq_sv4si (__a, __b); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq_n_s32 (int32x4_t __a, const int __imm) @@ -2785,34 +2687,6 @@ __arm_vmlsldavaxq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c) return __builtin_mve_vmlsldavaxq_sv8hi (__a, __b, __c); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavq_p_sv8hi (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavxq_p_sv8hi (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlsldavq_p_sv8hi (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlsldavxq_p_sv8hi (__a, __b, __p); -} - __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p) @@ -2827,13 +2701,6 @@ __arm_vmlaldavaq_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c) return __builtin_mve_vmlaldavaq_uv8hi (__a, __b, __c); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavq_p_uv8hi (__a, __b, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p) @@ -2869,34 +2736,6 @@ __arm_vmlsldavaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c) return __builtin_mve_vmlsldavaxq_sv4si (__a, __b, __c); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavq_p_sv4si (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavxq_p_sv4si (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlsldavq_p_sv4si (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlsldavxq_p_sv4si (__a, __b, __p); -} - __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p) @@ -2911,13 +2750,6 @@ __arm_vmlaldavaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c) return __builtin_mve_vmlaldavaq_uv4si (__a, __b, __c); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vmlaldavq_p_uv4si (__a, __b, __p); -} - __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p) @@ -9139,13 +8971,6 @@ __arm_vmullbq_poly (uint8x16_t __a, uint8x16_t __b) return __arm_vmullbq_poly_p8 (__a, __b); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq (uint16x8_t __a, uint16x8_t __b) -{ - return __arm_vmlaldavq_u16 (__a, __b); -} - __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq (uint16x8_t __a, const int __imm) @@ -9181,34 +9006,6 @@ __arm_vqdmullbq (int16x8_t __a, int16_t __b) return __arm_vqdmullbq_n_s16 (__a, __b); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq (int16x8_t __a, int16x8_t __b) -{ - return __arm_vmlsldavxq_s16 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq (int16x8_t __a, int16x8_t __b) -{ - return __arm_vmlsldavq_s16 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq (int16x8_t __a, int16x8_t __b) -{ - return __arm_vmlaldavxq_s16 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq (int16x8_t __a, int16x8_t __b) -{ - return __arm_vmlaldavq_s16 (__a, __b); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq (int16x8_t __a, const int __imm) @@ -9230,13 +9027,6 @@ __arm_vmullbq_poly (uint16x8_t __a, uint16x8_t __b) return __arm_vmullbq_poly_p16 (__a, __b); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq (uint32x4_t __a, uint32x4_t __b) -{ - return __arm_vmlaldavq_u32 (__a, __b); -} - __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq (uint32x4_t __a, const int __imm) @@ -9272,34 +9062,6 @@ __arm_vqdmullbq (int32x4_t __a, int32_t __b) return __arm_vqdmullbq_n_s32 (__a, __b); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq (int32x4_t __a, int32x4_t __b) -{ - return __arm_vmlsldavxq_s32 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq (int32x4_t __a, int32x4_t __b) -{ - return __arm_vmlsldavq_s32 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq (int32x4_t __a, int32x4_t __b) -{ - return __arm_vmlaldavxq_s32 (__a, __b); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq (int32x4_t __a, int32x4_t __b) -{ - return __arm_vmlaldavq_s32 (__a, __b); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq (int32x4_t __a, const int __imm) @@ -10035,34 +9797,6 @@ __arm_vmlsldavaxq (int64_t __a, int16x8_t __b, int16x8_t __c) return __arm_vmlsldavaxq_s16 (__a, __b, __c); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavq_p_s16 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavxq_p_s16 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmlsldavq_p_s16 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmlsldavxq_p_s16 (__a, __b, __p); -} - __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p) @@ -10077,13 +9811,6 @@ __arm_vmlaldavaq (uint64_t __a, uint16x8_t __b, uint16x8_t __c) return __arm_vmlaldavaq_u16 (__a, __b, __c); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavq_p_u16 (__a, __b, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p) @@ -10119,34 +9846,6 @@ __arm_vmlsldavaxq (int64_t __a, int32x4_t __b, int32x4_t __c) return __arm_vmlsldavaxq_s32 (__a, __b, __c); } -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavq_p_s32 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavxq_p_s32 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmlsldavq_p_s32 (__a, __b, __p); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlsldavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmlsldavxq_p_s32 (__a, __b, __p); -} - __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p) @@ -10161,13 +9860,6 @@ __arm_vmlaldavaq (uint64_t __a, uint32x4_t __b, uint32x4_t __c) return __arm_vmlaldavaq_u32 (__a, __b, __c); } -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vmlaldavq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p) -{ - return __arm_vmlaldavq_p_u32 (__a, __b, __p); -} - __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p) @@ -15203,12 +14895,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));}) -#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - #define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ @@ -16272,12 +15958,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));}) -#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - #define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ @@ -17460,28 +17140,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));}) -#define __arm_vmlaldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) - -#define __arm_vmlaldavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavq_p_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) - -#define __arm_vmlaldavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));}) - #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ @@ -17496,30 +17154,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));}) -#define __arm_vmlsldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - -#define __arm_vmlsldavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));}) - -#define __arm_vmlsldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));}) - -#define __arm_vmlsldavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));}) - #define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ -- 2.34.1