Hi All, This patch adds NEON intrinsics and tests for the Armv8.3-a complex multiplication and add instructions with a rotate along the Argand plane.
The instructions are documented in the ArmARM[1] and the intrinsics specification will be published on the Arm website [2]. The Lane versions of these instructions are special in that they always select a pair. using index 0 means selecting lane 0 and 1. Because of this the range check for the intrinsics require special handling. [1] https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile [2] https://developer.arm.com/docs/101028/latest Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for trunk? Thanks, Tamar gcc/ChangeLog: 2018-12-11 Tamar Christina <tamar.christ...@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot><mode>, aarch64_fcmlaq_lane<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. gcc/testsuite/ChangeLog: 2018-12-11 Tamar Christina <tamar.christ...@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. --
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 8cced94567008e28b1761ec8771589a3925f2904..aaf18a909828b3eeac9d3b676f429923609972a3 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -102,7 +102,10 @@ enum aarch64_type_qualifiers /* Lane indices - must be in range, and flipped for bigendian. */ qualifier_lane_index = 0x200, /* Lane indices for single lane structure loads and stores. */ - qualifier_struct_load_store_lane_index = 0x400 + qualifier_struct_load_store_lane_index = 0x400, + /* Lane indices selected in pairs. - must be in range, and flipped for + bigendian. */ + qualifier_lane_pair_index = 0x800, }; typedef struct @@ -171,6 +174,11 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_lane_pair_index }; +#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers) static enum aarch64_type_qualifiers aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, @@ -1031,6 +1039,7 @@ typedef enum SIMD_ARG_CONSTANT, SIMD_ARG_LANE_INDEX, SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, + SIMD_ARG_LANE_PAIR_INDEX, SIMD_ARG_STOP } builtin_simd_arg; @@ -1102,6 +1111,22 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); } + /* If the lane index isn't a constant then error out. */ + goto constant_arg; + + case SIMD_ARG_LANE_PAIR_INDEX: + /* Must be a previous operand into which this is an index and + index is restricted to nunits / 2. */ + gcc_assert (opc > 0); + if (CONST_INT_P (op[opc])) + { + machine_mode vmode = insn_data[icode].operand[opc - 1].mode; + unsigned int nunits + = GET_MODE_NUNITS (vmode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp); + /* Keep to GCC-vector-extension lane indices in the RTL. */ + op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); + } /* Fall through - if the lane index isn't a constant then the next case will error. */ /* FALLTHRU */ @@ -1215,6 +1240,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) if (d->qualifiers[qualifiers_k] & qualifier_lane_index) args[k] = SIMD_ARG_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) + args[k] = SIMD_ARG_LANE_PAIR_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c index 40c738c7c3b0fc09378dd8058f09e4e4fff33a6a..39943e3a930972d5019162c0e8b7c26e059f71f6 100644 --- a/gcc/config/aarch64/aarch64-c.c +++ b/gcc/config/aarch64/aarch64-c.c @@ -109,6 +109,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile); aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile); + aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile); cpp_undef (pfile, "__AARCH64_CMODEL_TINY__"); cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__"); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 980c90351b36630d7bcf6b8f5c0ff11d081665e7..90c8372c01dc8119c117d4c2590117e320a5b788 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -217,6 +217,30 @@ BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0) BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0) + /* Implemented by aarch64_fcadd<rot><mode>. */ + BUILTIN_VHSDF (BINOP, fcadd90, 0) + BUILTIN_VHSDF (BINOP, fcadd270, 0) + + /* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>. */ + BUILTIN_VHSDF (TERNOP, fcmla0, 0) + BUILTIN_VHSDF (TERNOP, fcmla90, 0) + BUILTIN_VHSDF (TERNOP, fcmla180, 0) + BUILTIN_VHSDF (TERNOP, fcmla270, 0) + BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0) + BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0) + BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0) + BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane270, 0) + + BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq0, 0) + BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq90, 0) + BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq180, 0) + BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq270, 0) + + BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane0, 0) + BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane90, 0) + BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane180, 0) + BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane270, 0) + BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) VAR1 (SHIFTIMM, ashr_simd, 0, di) BUILTIN_VDQ_I (SHIFTIMM, lshr, 3) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bd5fc199e4fc7b8452aa728333fc1d0e6117da51..7e118f95deb033e40548ad2980988be8668f1fa0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -476,6 +476,42 @@ DONE; }) + +(define_insn "aarch64_fcmla_lane<rot><mode>" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") + (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") + (match_operand:VHSDF 3 "register_operand" "w") + (match_operand:SI 4 "const_int_operand" "n")] + FCMLA)))] + "TARGET_COMPLEX" + "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>" + [(set_attr "type" "neon_fcmla")] +) + +(define_insn "aarch64_fcmla_laneq<rot><mode>" + [(set (match_operand:VDF 0 "register_operand" "=w") + (plus:VDF (match_operand:VDF 1 "register_operand" "0") + (unspec:VDF [(match_operand:VDF 2 "register_operand" "w") + (match_operand:<VDBL> 3 "register_operand" "w") + (match_operand:SI 4 "const_int_operand" "n")] + FCMLA)))] + "TARGET_COMPLEX" + "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>" + [(set_attr "type" "neon_fcmla")] +) + +(define_insn "aarch64_fcmlaq_lane<rot><mode>" + [(set (match_operand:VQ_HSF 0 "register_operand" "=w") + (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0") + (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w") + (match_operand:<VHALF> 3 "register_operand" "w") + (match_operand:SI 4 "const_int_operand" "n")] + FCMLA)))] + "TARGET_COMPLEX" + "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>" + [(set_attr "type" "neon_fcmla")] +) ;; These instructions map to the __builtins for the Dot Product operations. (define_insn "aarch64_<sur>dot<vsi2qi>" [(set (match_operand:VS 0 "register_operand" "=w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fc734e1aa9e93c171c0670164e5a3a54209905d3..32e70e31c369184232502704d69b74029228e49c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -33294,6 +33294,481 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c) return __builtin_aarch64_bcaxqv2di (__a, __b, __c); } +#pragma GCC pop_options + +/* AdvSIMD Complex numbers intrinsics. */ + +#pragma GCC push_options +#pragma GCC target(("arch=armv8.3-a")) + +#pragma GCC push_options +#pragma GCC target(("+fp16")) +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcadd90v4hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcadd90v8hf (__a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcadd270v4hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcadd270v8hf (__a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcmla0v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcmla0v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane0v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq0v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane0v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane90v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq90v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane90v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcmla90v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcmla90v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane0v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq180v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane180v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcmla180v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcmla180v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane90v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane270v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane270v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq270v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fcmla270v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fcmla270v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane180v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane180v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane270v4hf (__r, __a, __b, __index); +} +#pragma GCC pop_options + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcadd90v2sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcadd90v4sf (__a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcadd90v2df (__a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcadd270v2sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcadd270v4sf (__a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcadd270v2df (__a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcmla0v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcmla0v4sf (__r, __a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcmla0v2df (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane0v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq0v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane0v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane0v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcmla90v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcmla90v4sf (__r, __a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcmla90v2df (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane90v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq90v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane90v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane90v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcmla180v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcmla180v4sf (__r, __a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcmla180v2df (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane180v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq180v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane180v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane180v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fcmla270v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fcmla270v4sf (__r, __a, __b); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fcmla270v2df (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane270v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_laneq270v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_aarch64_fcmlaq_lane270v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_aarch64_fcmla_lane270v4sf (__r, __a, __b, __index); +} #pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c new file mode 100644 index 0000000000000000000000000000000000000000..c5c6c905284214dfabdf289789e10e5d2ee2a1a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c @@ -0,0 +1,259 @@ +/* { dg-skip-if "" { arm-*-* } } */ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */ +/* { dg-add-options arm_v8_3a_complex_neon } */ +/* { dg-additional-options "-O2" } */ + +#include <arm_neon.h> + +float32x2_t +test_vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b) +{ + return vcadd_rot90_f32 (__a, __b); +} + +float32x4_t +test_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) +{ + return vcaddq_rot90_f32 (__a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b) +{ + return vcaddq_rot90_f64 (__a, __b); +} +#endif + +float32x2_t +test_vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b) +{ + return vcadd_rot270_f32 (__a, __b); +} + +float32x4_t +test_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) +{ + return vcaddq_rot270_f32 (__a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b) +{ + return vcaddq_rot270_f64 (__a, __b); +} +#endif + +float32x2_t +test_vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_f32 (__r, __a, __b); +} + +float32x4_t +test_vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_f32 (__r, __a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return vcmlaq_f64 (__r, __a, __b); +} +#endif + +float32x2_t +test_vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_lane_f32 (__r, __a, __b, 0); +} + +float32x2_t +test_vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b) +{ + return vcmla_laneq_f32 (__r, __a, __b, 1); +} + +float32x4_t +test_vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b) +{ + return vcmlaq_lane_f32 (__r, __a, __b, 0); +} + +float32x4_t +test_vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_laneq_f32 (__r, __a, __b, 1); +} + +float32x2_t +test_vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot90_f32 (__r, __a, __b); +} + +float32x4_t +test_vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot90_f32 (__r, __a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return vcmlaq_rot90_f64 (__r, __a, __b); +} +#endif + +float32x2_t +test_vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot90_lane_f32 (__r, __a, __b, 0); +} + +float32x2_t +test_vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b) +{ + return vcmla_rot90_laneq_f32 (__r, __a, __b, 1); +} + +float32x4_t +test_vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b) +{ + return vcmlaq_rot90_lane_f32 (__r, __a, __b, 0); +} + +float32x4_t +test_vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot90_laneq_f32 (__r, __a, __b, 1); +} + +float32x2_t +test_vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot180_f32 (__r, __a, __b); +} + +float32x4_t +test_vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot180_f32 (__r, __a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return vcmlaq_rot180_f64 (__r, __a, __b); +} +#endif + +float32x2_t +test_vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot180_lane_f32 (__r, __a, __b, 0); +} + +float32x2_t +test_vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b) +{ + return vcmla_rot180_laneq_f32 (__r, __a, __b, 1); +} + +float32x4_t +test_vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b) +{ + return vcmlaq_rot180_lane_f32 (__r, __a, __b, 0); +} + +float32x4_t +test_vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot180_laneq_f32 (__r, __a, __b, 1); +} + +float32x2_t +test_vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot270_f32 (__r, __a, __b); +} + +float32x4_t +test_vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot270_f32 (__r, __a, __b); +} + +#ifdef __ARM_ARCH_ISA_A64 +float64x2_t +test_vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b) +{ + return vcmlaq_rot270_f64 (__r, __a, __b); +} +#endif + +float32x2_t +test_vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return vcmla_rot270_lane_f32 (__r, __a, __b, 0); +} + +float32x2_t +test_vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b) +{ + return vcmla_rot270_laneq_f32 (__r, __a, __b, 1); +} + +float32x4_t +test_vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b) +{ + return vcmlaq_rot270_lane_f32 (__r, __a, __b, 0); +} + +float32x4_t +test_vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return vcmlaq_rot270_laneq_f32 (__r, __a, __b, 1); +} + +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c new file mode 100644 index 0000000000000000000000000000000000000000..ab62f03a213f303f2a4427ce7254f05f077c1ab7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c @@ -0,0 +1,310 @@ +/* { dg-skip-if "" { arm-*-* } } */ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-add-options arm_v8_3a_complex_neon } */ +/* { dg-additional-options "-O2 -march=armv8.3-a+fp16" } */ + +#include <arm_neon.h> + +float16x4_t +test_vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b) +{ + return vcadd_rot90_f16 (__a, __b); +} + +float16x8_t +test_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) +{ + return vcaddq_rot90_f16 (__a, __b); +} + +float16x4_t +test_vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b) +{ + return vcadd_rot270_f16 (__a, __b); +} + +float16x8_t +test_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) +{ + return vcaddq_rot270_f16 (__a, __b); +} + +float16x4_t +test_vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_f16 (__r, __a, __b); +} + +float16x8_t +test_vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_f16 (__r, __a, __b); +} + +float16x4_t +test_vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_lane_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_laneq_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_lane_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_laneq_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_lane_f16 (__r, __a, __b, 1); +} + +float16x4_t +test_vcmla_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_laneq_f16 (__r, __a, __b, 3); +} + +float16x8_t +test_vcmlaq_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_lane_f16 (__r, __a, __b, 1); +} + +float16x8_t +test_vcmlaq_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_laneq_f16 (__r, __a, __b, 3); +} + +float16x4_t +test_vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot90_f16 (__r, __a, __b); +} + +float16x8_t +test_vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot90_f16 (__r, __a, __b); +} + +float16x4_t +test_vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot90_lane_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot90_laneq_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot90_lane_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot90_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot90_lane_f16 (__r, __a, __b, 1); +} + +float16x4_t +test_vcmla_rot90_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot90_laneq_f16 (__r, __a, __b, 3); +} + +float16x8_t +test_vcmlaq_rot90_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot90_lane_f16 (__r, __a, __b, 1); +} + +float16x8_t +test_vcmlaq_rot90_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 3); +} + +float16x4_t +test_vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot180_f16 (__r, __a, __b); +} + +float16x8_t +test_vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot180_f16 (__r, __a, __b); +} + +float16x4_t +test_vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot180_lane_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot180_laneq_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot180_lane_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot180_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot180_lane_f16 (__r, __a, __b, 1); +} + +float16x4_t +test_vcmla_rot180_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot180_laneq_f16 (__r, __a, __b, 3); +} + +float16x8_t +test_vcmlaq_rot180_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot180_lane_f16 (__r, __a, __b, 1); +} + +float16x8_t +test_vcmlaq_rot180_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 3); +} + +float16x4_t +test_vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot270_f16 (__r, __a, __b); +} + +float16x8_t +test_vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot270_f16 (__r, __a, __b); +} + +float16x4_t +test_vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot270_lane_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot270_laneq_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot270_lane_f16 (__r, __a, __b, 0); +} + +float16x8_t +test_vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 0); +} + +float16x4_t +test_vcmla_rot270_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return vcmla_rot270_lane_f16 (__r, __a, __b, 1); +} + +float16x4_t +test_vcmla_rot270_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b) +{ + return vcmla_rot270_laneq_f16 (__r, __a, __b, 3); +} + +float16x8_t +test_vcmlaq_rot270_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b) +{ + return vcmlaq_rot270_lane_f16 (__r, __a, __b, 1); +} + +float16x8_t +test_vcmlaq_rot270_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 3); +} + +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #90} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #0} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */ +