[PATCH 6/9][GCC][AArch64] Add Armv8.3-a complex intrinsics

Tamar Christina Tue, 11 Dec 2018 07:47:44 -0800

Hi All,

This patch adds NEON intrinsics and tests for the Armv8.3-a complex
multiplication and add instructions with a rotate along the Argand plane.


The instructions are documented in the ArmARM[1] and the intrinsics 
specification
will be published on the Arm website [2].

The Lane versions of these instructions are special in that they always select 
a pair.
using index 0 means selecting lane 0 and 1.  Because of this the range check 
for the
intrinsics require special handling.

[1] 
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
[2] https://developer.arm.com/docs/101028/latest

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for trunk?

Thanks,
Tamar

gcc/ChangeLog:

2018-12-11  Tamar Christina  <tamar.christ...@arm.com>

        * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add 
qualifier_lane_pair_index.
        (TYPES_QUADOP_LANE_PAIR): New.
        (aarch64_simd_expand_args): Use it.
        (aarch64_simd_expand_builtin): Likewise.
        * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add 
__ARM_FEATURE_COMPLEX.
        * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, 
fcmla90,
        fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, 
fcmla_lane270,
        fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270,
        fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New.
        * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>,
        aarch64_fcmla_laneq<rot><mode>, aarch64_fcmlaq_lane<rot><mode>): New.
        * config/aarch64/arm_neon.h:
        (vcadd_rot90_f16): New.
        (vcaddq_rot90_f16): New.
        (vcadd_rot270_f16): New.
        (vcaddq_rot270_f16): New.
        (vcmla_f16): New.
        (vcmlaq_f16): New.
        (vcmla_lane_f16): New.
        (vcmla_laneq_f16): New.
        (vcmlaq_lane_f16): New.
        (vcmlaq_rot90_lane_f16): New.
        (vcmla_rot90_laneq_f16): New.
        (vcmla_rot90_lane_f16): New.
        (vcmlaq_rot90_f16): New.
        (vcmla_rot90_f16): New.
        (vcmlaq_laneq_f16): New.
        (vcmla_rot180_laneq_f16): New.
        (vcmla_rot180_lane_f16): New.
        (vcmlaq_rot180_f16): New.
        (vcmla_rot180_f16): New.
        (vcmlaq_rot90_laneq_f16): New.
        (vcmlaq_rot270_laneq_f16): New.
        (vcmlaq_rot270_lane_f16): New.
        (vcmla_rot270_laneq_f16): New.
        (vcmlaq_rot270_f16): New.
        (vcmla_rot270_f16): New.
        (vcmlaq_rot180_laneq_f16): New.
        (vcmlaq_rot180_lane_f16): New.
        (vcmla_rot270_lane_f16): New.
        (vcadd_rot90_f32): New.
        (vcaddq_rot90_f32): New.
        (vcaddq_rot90_f64): New.
        (vcadd_rot270_f32): New.
        (vcaddq_rot270_f32): New.
        (vcaddq_rot270_f64): New.
        (vcmla_f32): New.
        (vcmlaq_f32): New.
        (vcmlaq_f64): New.
        (vcmla_lane_f32): New.
        (vcmla_laneq_f32): New.
        (vcmlaq_lane_f32): New.
        (vcmlaq_laneq_f32): New.
        (vcmla_rot90_f32): New.
        (vcmlaq_rot90_f32): New.
        (vcmlaq_rot90_f64): New.
        (vcmla_rot90_lane_f32): New.
        (vcmla_rot90_laneq_f32): New.
        (vcmlaq_rot90_lane_f32): New.
        (vcmlaq_rot90_laneq_f32): New.
        (vcmla_rot180_f32): New.
        (vcmlaq_rot180_f32): New.
        (vcmlaq_rot180_f64): New.
        (vcmla_rot180_lane_f32): New.
        (vcmla_rot180_laneq_f32): New.
        (vcmlaq_rot180_lane_f32): New.
        (vcmlaq_rot180_laneq_f32): New.
        (vcmla_rot270_f32): New.
        (vcmlaq_rot270_f32): New.
        (vcmlaq_rot270_f64): New.
        (vcmla_rot270_lane_f32): New.
        (vcmla_rot270_laneq_f32): New.
        (vcmlaq_rot270_lane_f32): New.
        (vcmlaq_rot270_laneq_f32): New.

gcc/testsuite/ChangeLog:

2018-12-11  Tamar Christina  <tamar.christ...@arm.com>

        * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test.
        * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test.

--

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 8cced94567008e28b1761ec8771589a3925f2904..aaf18a909828b3eeac9d3b676f429923609972a3 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -102,7 +102,10 @@ enum aarch64_type_qualifiers
   /* Lane indices - must be in range, and flipped for bigendian.  */
   qualifier_lane_index = 0x200,
   /* Lane indices for single lane structure loads and stores.  */
-  qualifier_struct_load_store_lane_index = 0x400
+  qualifier_struct_load_store_lane_index = 0x400,
+  /* Lane indices selected in pairs. - must be in range, and flipped for
+     bigendian.  */
+  qualifier_lane_pair_index = 0x800,
 };
 
 typedef struct
@@ -171,6 +174,11 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
 
 
+static enum aarch64_type_qualifiers
+aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_none, qualifier_lane_pair_index };
+#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
 static enum aarch64_type_qualifiers
 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none,
@@ -1031,6 +1039,7 @@ typedef enum
   SIMD_ARG_CONSTANT,
   SIMD_ARG_LANE_INDEX,
   SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
+  SIMD_ARG_LANE_PAIR_INDEX,
   SIMD_ARG_STOP
 } builtin_simd_arg;
 
@@ -1102,6 +1111,22 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
 		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
 		  op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
 		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+
+	    case SIMD_ARG_LANE_PAIR_INDEX:
+	      /* Must be a previous operand into which this is an index and
+		 index is restricted to nunits / 2.  */
+	      gcc_assert (opc > 0);
+	      if (CONST_INT_P (op[opc]))
+		{
+		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
+		  unsigned int nunits
+		    = GET_MODE_NUNITS (vmode).to_constant ();
+		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
+		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
+		  op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
+		}
 	      /* Fall through - if the lane index isn't a constant then
 		 the next case will error.  */
 	      /* FALLTHRU */
@@ -1215,6 +1240,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
 
       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
 	args[k] = SIMD_ARG_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
+	args[k] = SIMD_ARG_LANE_PAIR_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
 	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 40c738c7c3b0fc09378dd8058f09e4e4fff33a6a..39943e3a930972d5019162c0e8b7c26e059f71f6 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -109,6 +109,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile);
   aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile);
+  aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile);
 
   cpp_undef (pfile, "__AARCH64_CMODEL_TINY__");
   cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__");
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 980c90351b36630d7bcf6b8f5c0ff11d081665e7..90c8372c01dc8119c117d4c2590117e320a5b788 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -217,6 +217,30 @@
   BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
   BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
 
+  /* Implemented by aarch64_fcadd<rot><mode>.   */
+  BUILTIN_VHSDF (BINOP, fcadd90, 0)
+  BUILTIN_VHSDF (BINOP, fcadd270, 0)
+
+  /* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>.   */
+  BUILTIN_VHSDF (TERNOP, fcmla0, 0)
+  BUILTIN_VHSDF (TERNOP, fcmla90, 0)
+  BUILTIN_VHSDF (TERNOP, fcmla180, 0)
+  BUILTIN_VHSDF (TERNOP, fcmla270, 0)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0)
+  BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane270, 0)
+
+  BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq0, 0)
+  BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq90, 0)
+  BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq180, 0)
+  BUILTIN_VDF (QUADOP_LANE_PAIR, fcmla_laneq270, 0)
+
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane0, 0)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane90, 0)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane180, 0)
+  BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane270, 0)
+
   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
   VAR1 (SHIFTIMM, ashr_simd, 0, di)
   BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bd5fc199e4fc7b8452aa728333fc1d0e6117da51..7e118f95deb033e40548ad2980988be8668f1fa0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -476,6 +476,42 @@
   DONE;
 })
 
+
+(define_insn "aarch64_fcmla_lane<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+				   (match_operand:VHSDF 3 "register_operand" "w")
+				   (match_operand:SI 4 "const_int_operand" "n")]
+				   FCMLA)))]
+  "TARGET_COMPLEX"
+  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>"
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmla_laneq<rot><mode>"
+  [(set (match_operand:VDF 0 "register_operand" "=w")
+	(plus:VDF (match_operand:VDF 1 "register_operand" "0")
+		    (unspec:VDF [(match_operand:VDF 2 "register_operand" "w")
+				 (match_operand:<VDBL> 3 "register_operand" "w")
+				 (match_operand:SI 4 "const_int_operand" "n")]
+				 FCMLA)))]
+  "TARGET_COMPLEX"
+  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>"
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmlaq_lane<rot><mode>"
+  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
+	(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
+		     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
+				     (match_operand:<VHALF> 3 "register_operand" "w")
+				     (match_operand:SI 4 "const_int_operand" "n")]
+				     FCMLA)))]
+  "TARGET_COMPLEX"
+  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4], #<rot>"
+  [(set_attr "type" "neon_fcmla")]
+)
 ;; These instructions map to the __builtins for the Dot Product operations.
 (define_insn "aarch64_<sur>dot<vsi2qi>"
   [(set (match_operand:VS 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index fc734e1aa9e93c171c0670164e5a3a54209905d3..32e70e31c369184232502704d69b74029228e49c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -33294,6 +33294,481 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
   return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
 }
 
+#pragma GCC pop_options
+
+/* AdvSIMD Complex numbers intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target(("arch=armv8.3-a"))
+
+#pragma GCC push_options
+#pragma GCC target(("+fp16"))
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcadd90v4hf (__a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcadd90v8hf (__a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcadd270v4hf (__a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcadd270v8hf (__a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcmla0v4hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcmla0v8hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		const int __index)
+{
+  return __builtin_aarch64_fcmla_lane0v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq0v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane0v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane90v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq90v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		      const int __index)
+{
+  return __builtin_aarch64_fcmla_lane90v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcmla90v8hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcmla90v4hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+		  const int __index)
+{
+  return __builtin_aarch64_fcmla_lane0v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq180v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_lane180v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcmla180v8hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcmla180v4hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_lane90v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			 const int __index)
+{
+  return __builtin_aarch64_fcmla_lane270v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane270v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq270v4hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return __builtin_aarch64_fcmla270v8hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_aarch64_fcmla270v4hf (__r, __a, __b);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
+			 const int __index)
+{
+  return __builtin_aarch64_fcmla_lane180v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane180v8hf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_lane270v4hf (__r, __a, __b, __index);
+}
+#pragma GCC pop_options
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcadd90v2sf (__a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcadd90v4sf (__a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcadd90v2df (__a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcadd270v2sf (__a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcadd270v4sf (__a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcadd270v2df (__a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcmla0v2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcmla0v4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcmla0v2df (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		const int __index)
+{
+  return __builtin_aarch64_fcmla_lane0v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq0v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane0v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+		  const int __index)
+{
+  return __builtin_aarch64_fcmla_lane0v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcmla90v2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcmla90v4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcmla90v2df (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		      const int __index)
+{
+  return __builtin_aarch64_fcmla_lane90v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq90v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane90v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_lane90v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcmla180v2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcmla180v4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcmla180v2df (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_lane180v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq180v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane180v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			 const int __index)
+{
+  return __builtin_aarch64_fcmla_lane180v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return __builtin_aarch64_fcmla270v2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return __builtin_aarch64_fcmla270v4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return __builtin_aarch64_fcmla270v2df (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
+		       const int __index)
+{
+  return __builtin_aarch64_fcmla_lane270v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmla_laneq270v2sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
+			const int __index)
+{
+  return __builtin_aarch64_fcmlaq_lane270v4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
+			 const int __index)
+{
+  return __builtin_aarch64_fcmla_lane270v4sf (__r, __a, __b, __index);
+}
 
 #pragma GCC pop_options
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c
new file mode 100644
index 0000000000000000000000000000000000000000..c5c6c905284214dfabdf289789e10e5d2ee2a1a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c
@@ -0,0 +1,259 @@
+/* { dg-skip-if "" { arm-*-* } } */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
+/* { dg-add-options arm_v8_3a_complex_neon }  */
+/* { dg-additional-options "-O2" } */
+
+#include <arm_neon.h>
+
+float32x2_t
+test_vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vcadd_rot90_f32 (__a, __b);
+}
+
+float32x4_t
+test_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vcaddq_rot90_f32 (__a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vcaddq_rot90_f64 (__a, __b);
+}
+#endif
+
+float32x2_t
+test_vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return vcadd_rot270_f32 (__a, __b);
+}
+
+float32x4_t
+test_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return vcaddq_rot270_f32 (__a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b)
+{
+  return vcaddq_rot270_f64 (__a, __b);
+}
+#endif
+
+float32x2_t
+test_vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_f32 (__r, __a, __b);
+}
+
+float32x4_t
+test_vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_f32 (__r, __a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return vcmlaq_f64 (__r, __a, __b);
+}
+#endif
+
+float32x2_t
+test_vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x2_t
+test_vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
+{
+  return vcmla_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x4_t
+test_vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
+{
+  return vcmlaq_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x4_t
+test_vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x2_t
+test_vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot90_f32 (__r, __a, __b);
+}
+
+float32x4_t
+test_vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot90_f32 (__r, __a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return vcmlaq_rot90_f64 (__r, __a, __b);
+}
+#endif
+
+float32x2_t
+test_vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot90_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x2_t
+test_vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
+{
+  return vcmla_rot90_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x4_t
+test_vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
+{
+  return vcmlaq_rot90_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x4_t
+test_vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot90_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x2_t
+test_vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot180_f32 (__r, __a, __b);
+}
+
+float32x4_t
+test_vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot180_f32 (__r, __a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return vcmlaq_rot180_f64 (__r, __a, __b);
+}
+#endif
+
+float32x2_t
+test_vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot180_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x2_t
+test_vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
+{
+  return vcmla_rot180_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x4_t
+test_vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
+{
+  return vcmlaq_rot180_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x4_t
+test_vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot180_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x2_t
+test_vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot270_f32 (__r, __a, __b);
+}
+
+float32x4_t
+test_vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot270_f32 (__r, __a, __b);
+}
+
+#ifdef __ARM_ARCH_ISA_A64
+float64x2_t
+test_vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
+{
+  return vcmlaq_rot270_f64 (__r, __a, __b);
+}
+#endif
+
+float32x2_t
+test_vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
+{
+  return vcmla_rot270_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x2_t
+test_vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
+{
+  return vcmla_rot270_laneq_f32 (__r, __a, __b, 1);
+}
+
+float32x4_t
+test_vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
+{
+  return vcmlaq_rot270_lane_f32 (__r, __a, __b, 0);
+}
+
+float32x4_t
+test_vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
+{
+  return vcmlaq_rot270_laneq_f32 (__r, __a, __b, 1);
+}
+
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.s\[0\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[0\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab62f03a213f303f2a4427ce7254f05f077c1ab7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c
@@ -0,0 +1,310 @@
+/* { dg-skip-if "" { arm-*-* } } */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
+/* { dg-additional-options "-O2 -march=armv8.3-a+fp16" } */
+
+#include <arm_neon.h>
+
+float16x4_t
+test_vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return vcadd_rot90_f16 (__a, __b);
+}
+
+float16x8_t
+test_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
+{
+  return vcaddq_rot90_f16 (__a, __b);
+}
+
+float16x4_t
+test_vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return vcadd_rot270_f16 (__a, __b);
+}
+
+float16x8_t
+test_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
+{
+  return vcaddq_rot270_f16 (__a, __b);
+}
+
+float16x4_t
+test_vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_f16 (__r, __a, __b);
+}
+
+float16x8_t
+test_vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_f16 (__r, __a, __b);
+}
+
+float16x4_t
+test_vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x4_t
+test_vcmla_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x8_t
+test_vcmlaq_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x8_t
+test_vcmlaq_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x4_t
+test_vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot90_f16 (__r, __a, __b);
+}
+
+float16x8_t
+test_vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot90_f16 (__r, __a, __b);
+}
+
+float16x4_t
+test_vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot90_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot90_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot90_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot90_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot90_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x4_t
+test_vcmla_rot90_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot90_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x8_t
+test_vcmlaq_rot90_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot90_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x8_t
+test_vcmlaq_rot90_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x4_t
+test_vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot180_f16 (__r, __a, __b);
+}
+
+float16x8_t
+test_vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot180_f16 (__r, __a, __b);
+}
+
+float16x4_t
+test_vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot180_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot180_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot180_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot180_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot180_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x4_t
+test_vcmla_rot180_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot180_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x8_t
+test_vcmlaq_rot180_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot180_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x8_t
+test_vcmlaq_rot180_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x4_t
+test_vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot270_f16 (__r, __a, __b);
+}
+
+float16x8_t
+test_vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot270_f16 (__r, __a, __b);
+}
+
+float16x4_t
+test_vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot270_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot270_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot270_lane_f16 (__r, __a, __b, 0);
+}
+
+float16x8_t
+test_vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 0);
+}
+
+float16x4_t
+test_vcmla_rot270_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
+{
+  return vcmla_rot270_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x4_t
+test_vcmla_rot270_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
+{
+  return vcmla_rot270_laneq_f16 (__r, __a, __b, 3);
+}
+
+float16x8_t
+test_vcmlaq_rot270_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
+{
+  return vcmlaq_rot270_lane_f16 (__r, __a, __b, 1);
+}
+
+float16x8_t
+test_vcmlaq_rot270_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
+{
+  return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 3);
+}
+
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.4h, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[1\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.4h, v[0-9]+\.4h, v[0-9]+\.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h, #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[1\], #90} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #0} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */
+

[PATCH 6/9][GCC][AArch64] Add Armv8.3-a complex intrinsics

Reply via email to