This patch optimizes certain vector permute expansion with the FMOV instruction when one of the input vectors is a vector of all zeros and the result of the vector permute is as if the upper lane of the non-zero input vector is set to zero and the lower lane remains unchanged.
Note that the patch also propagates zero_op0_p and zero_op1_p during re-encode now. They will be used by aarch64_evpc_fmov to check if the input vectors are valid candidates. PR target/100165 gcc/ChangeLog: * config/aarch64/aarch64-protos.h (aarch64_lane0_mask_p): New. * config/aarch64/aarch64-simd.md (@aarch64_simd_vec_set_zero_fmov<mode>): New define_insn. * config/aarch64/aarch64.cc (aarch64_lane0_mask_p): New. (aarch64_evpc_reencode): Copy zero_op0_p and zero_op1_p. (aarch64_evpc_fmov): New. (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_fmov. * config/aarch64/iterators.md (VALL_F16_NO_QI): New mode iterator. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vec-set-zero.c: Update test accordingly. * gcc.target/aarch64/fmov-1.c: New test. * gcc.target/aarch64/fmov-2.c: New test. * gcc.target/aarch64/fmov-3.c: New test. * gcc.target/aarch64/fmov-be-1.c: New test. * gcc.target/aarch64/fmov-be-2.c: New test. * gcc.target/aarch64/fmov-be-3.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com> --- gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64-simd.md | 13 ++ gcc/config/aarch64/aarch64.cc | 96 ++++++++++- gcc/config/aarch64/iterators.md | 9 + gcc/testsuite/gcc.target/aarch64/fmov-1.c | 158 ++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-2.c | 52 ++++++ gcc/testsuite/gcc.target/aarch64/fmov-3.c | 144 ++++++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-be-1.c | 144 ++++++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-be-2.c | 52 ++++++ gcc/testsuite/gcc.target/aarch64/fmov-be-3.c | 144 ++++++++++++++++ .../gcc.target/aarch64/vec-set-zero.c | 6 +- 11 files changed, 816 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-be-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-be-2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-be-3.c diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4235f4a0ca5..cba94914903 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1051,7 +1051,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, rtx *, rtx *, rtx *); void aarch64_expand_subvti (rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); - +bool aarch64_lane0_mask_p (unsigned int, rtx); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87e513..6ddc27c223e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1190,6 +1190,19 @@ (define_insn "@aarch64_simd_vec_set<mode>" [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] ) +(define_insn "@aarch64_simd_vec_set_zero_fmov<mode>" + [(set (match_operand:VALL_F16_NO_QI 0 "register_operand" "=w") + (vec_merge:VALL_F16_NO_QI + (match_operand:VALL_F16_NO_QI 1 "register_operand" "w") + (match_operand:VALL_F16_NO_QI 2 "aarch64_simd_imm_zero" "Dz") + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SIMD && aarch64_lane0_mask_p (<nunits>, operands[3])" + { + return "fmov\\t%<Vetype>0, %<Vetype>1"; + } + [(set_attr "type" "fmov")] +) + (define_insn "aarch64_simd_vec_set_zero<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index f5f23f6ff4b..c29a43f2553 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -23682,6 +23682,15 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands, return true; } +/* Return TRUE if OP is a valid vec_merge bit mask for lane 0. */ + +bool +aarch64_lane0_mask_p (unsigned int nelts, rtx op) +{ + return exact_log2 (INTVAL (op)) >= 0 + && (ENDIAN_LANE_N (nelts, exact_log2 (INTVAL (op))) == 0); +} + /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). */ void @@ -26058,6 +26067,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d) newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL; newd.op0 = d->op0 ? gen_lowpart (new_mode, d->op0) : NULL; newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL; + newd.zero_op0_p = d->zero_op0_p; + newd.zero_op1_p = d->zero_op1_p; newd.testing_p = d->testing_p; newd.one_vector_p = d->one_vector_p; @@ -26643,6 +26654,87 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) return true; } +/* Recognize patterns suitable for the FMOV instructions. */ +static bool +aarch64_evpc_fmov (struct expand_vec_perm_d *d) +{ + if (d->vec_flags != VEC_ADVSIMD) + return false; + + /* Either d->op0 or d->op1 should be a vector of all zeros. */ + if (d->one_vector_p || (!d->zero_op0_p && !d->zero_op1_p)) + return false; + + HOST_WIDE_INT nelt, elt; + + /* to_constant is safe since this routine is specific to Advanced SIMD + vectors. */ + nelt = d->perm.length ().to_constant (); + + if (!BYTES_BIG_ENDIAN) + { + if (!d->perm[0].is_constant (&elt)) + return false; + + /* Lane 0 of the output vector should be chosen from the non-zero + vector. */ + if (elt != (d->zero_op0_p ? nelt : 0)) + return false; + + for (HOST_WIDE_INT i = 1; i < nelt; i++) + { + if (!d->perm[i].is_constant (&elt)) + return false; + + /* All lanes except lane 0 of the output vector should be chosen from + the zero vector. */ + if (d->zero_op0_p && elt >= nelt) + return false; + + if (!d->zero_op0_p && elt < nelt) + return false; + } + } + else + { + if (!d->perm[nelt-1].is_constant (&elt)) + return false; + + /* Lane NELT-1 of the output vector should be chosen from the non-zero + vector. */ + if (elt != (d->zero_op0_p ? 2 * nelt - 1 : nelt - 1)) + return false; + + for (HOST_WIDE_INT i = 0; i < nelt - 1; i++) + { + if (!d->perm[i].is_constant (&elt)) + return false; + + /* All lanes except lane 0 of the output vector should be chosen from + the zero vector. */ + if (d->zero_op0_p && elt >= nelt) + return false; + + if (!d->zero_op0_p && elt < nelt) + return false; + } + } + + if (d->testing_p) + return true; + + machine_mode mode = d->vmode; + insn_code icode = code_for_aarch64_simd_vec_set_zero_fmov (mode); + expand_operand ops[4]; + create_output_operand (&ops[0], d->target, mode); + create_input_operand (&ops[1], d->zero_op0_p ? d->op1 : d->op0, mode); + create_input_operand (&ops[2], CONST0_RTX (mode), mode); + create_integer_operand (&ops[3], BYTES_BIG_ENDIAN ? 1 << (nelt - 1) : 1); + expand_insn (icode, 4, ops); + + return true; +} + static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) { @@ -26666,7 +26758,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) { if (d->vmode == d->op_mode) { - if (aarch64_evpc_rev_local (d)) + if (aarch64_evpc_fmov (d)) + return true; + else if (aarch64_evpc_rev_local (d)) return true; else if (aarch64_evpc_rev_global (d)) return true; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 5bfd6e7d362..17bb6f00abc 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -225,6 +225,15 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF]) +;; The VALL_F16 modes except the QI ones. +(define_mode_iterator VALL_F16_NO_QI [(V4HI "TARGET_SIMD_F16INST") + (V8HI "TARGET_SIMD_F16INST") + (V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST") + (V4BF "TARGET_SIMD_F16INST") + (V8BF "TARGET_SIMD_F16INST") + V2SI V4SI V2DI V2SF V4SF V2DF]) + ;; All Advanced SIMD modes barring HF modes, plus DI. (define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI]) diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1.c b/gcc/testsuite/gcc.target/aarch64/fmov-1.c new file mode 100644 index 00000000000..b87e2b4ca64 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-1.c @@ -0,0 +1,158 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef int v2si __attribute__ ((vector_size (8))); +typedef float v2sf __attribute__ ((vector_size (8))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov s0, s0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** g_v4hi: +** uzp1 v([0-9]+).2d, v0.2d, v0.2d +** adrp x([0-9]+), .LC0 +** ldr d([0-9]+), \[x\2, #:lo12:.LC0\] +** tbl v0.8b, {v\1.16b}, v\3.8b +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 3, 1, 4, 2 }); +} + +/* +** f_v8hi: +** fmov s0, s0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} + +/* +** f_v2si: +** fmov s0, s0 +** ret +*/ +v2si +f_v2si (v2si x) +{ + return __builtin_shuffle (x, (v2si){ 0, 0 }, (v2si){ 0, 3 }); +} + +/* +** g_v2si: +** fmov s0, s0 +** ret +*/ +v2si +g_v2si (v2si x) +{ + return __builtin_shuffle ((v2si){ 0, 0 }, x, (v2si){ 2, 0 }); +} + +/* +** f_v2sf: +** fmov s0, s0 +** ret +*/ +v2sf +f_v2sf (v2sf x) +{ + return __builtin_shuffle (x, (v2sf){ 0, 0 }, (v2si){ 0, 2 }); +} + +/* +** f_v2di: +** fmov d0, d0 +** ret +*/ +v2di +f_v2di (v2di x) +{ + return __builtin_shuffle (x, (v2di){ 0, 0 }, (v2di){ 0, 3 }); +} + +/* +** g_v2di: +** fmov d0, d0 +** ret +*/ +v2di +g_v2di (v2di x) +{ + return __builtin_shuffle ((v2di){ 0, 0 }, x, (v2di){ 2, 1 }); +} + +/* +** f_v2df: +** fmov d0, d0 +** ret +*/ +v2df +f_v2df (v2df x) +{ + return __builtin_shuffle (x, (v2df){ 0, 0 }, (v2di){ 0, 2 }); +} + +/* +** f_v4si: +** fmov d0, d0 +** ret +*/ +v4si +f_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 0, 1, 4, 5 }); +} + +/* +** g_v4si: +** fmov d0, d0 +** ret +*/ +v4si +g_v4si (v4si x) +{ + return __builtin_shuffle ((v4si){ 0, 0, 0, 0 }, x, (v4si){ 4, 5, 2, 3 }); +} + +/* +** h_v4si: +** fmov s0, s0 +** ret +*/ +v4si +h_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 0, 4, 5, 6 }); +} + +/* +** f_v4sf: +** fmov d0, d0 +** ret +*/ +v4sf +f_v4sf (v4sf x) +{ + return __builtin_shuffle (x, (v4sf){ 0, 0, 0, 0 }, (v4si){ 0, 1, 6, 7 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2.c b/gcc/testsuite/gcc.target/aarch64/fmov-2.c new file mode 100644 index 00000000000..e0f1b6d05fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-2.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle ((v4hi){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** f_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +g_v8hi (v8hi x) +{ + return __builtin_shuffle ((v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-3.c b/gcc/testsuite/gcc.target/aarch64/fmov-3.c new file mode 100644 index 00000000000..ebef6515722 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-3.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef __bf16 v4bf __attribute__ ((vector_size (8))); +typedef __bf16 v8bf __attribute__ ((vector_size (16))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +f_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +g_v4hf (v4hf x) +{ + return __builtin_shuffle ((v4hf){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** h_v4hf: +** fmov s0, s0 +** ret +*/ +v4hf +h_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** f_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +f_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +g_v8hf (v8hf x) +{ + return __builtin_shuffle ((v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} + +/* +** h_v8hf: +** fmov s0, s0 +** ret +*/ +v8hf +h_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} + +/* +** f_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +f_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +g_v4bf (v4bf x) +{ + return __builtin_shuffle ((v4bf){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** h_v4bf: +** fmov s0, s0 +** ret +*/ +v4bf +h_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** f_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +f_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +g_v8bf (v8bf x) +{ + return __builtin_shuffle ((v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} + +/* +** h_v8bf: +** fmov s0, s0 +** ret +*/ +v8bf +h_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-be-1.c b/gcc/testsuite/gcc.target/aarch64/fmov-be-1.c new file mode 100644 index 00000000000..1f070dc4800 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-be-1.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef int v2si __attribute__ ((vector_size (8))); +typedef float v2sf __attribute__ ((vector_size (8))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov s0, s0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8hi: +** fmov s0, s0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} + +/* +** f_v2si: +** fmov s0, s0 +** ret +*/ +v2si +f_v2si (v2si x) +{ + return __builtin_shuffle (x, (v2si){ 0, 0 }, (v2si){ 3, 1 }); +} + +/* +** g_v2si: +** fmov s0, s0 +** ret +*/ +v2si +g_v2si (v2si x) +{ + return __builtin_shuffle ((v2si){ 0, 0 }, x, (v2si){ 0, 3 }); +} + +/* +** f_v2sf: +** fmov s0, s0 +** ret +*/ +v2sf +f_v2sf (v2sf x) +{ + return __builtin_shuffle (x, (v2sf){ 0, 0 }, (v2si){ 2, 1 }); +} + +/* +** f_v2di: +** fmov d0, d0 +** ret +*/ +v2di +f_v2di (v2di x) +{ + return __builtin_shuffle (x, (v2di){ 0, 0 }, (v2di){ 2, 1 }); +} + +/* +** g_v2di: +** fmov d0, d0 +** ret +*/ +v2di +g_v2di (v2di x) +{ + return __builtin_shuffle ((v2di){ 0, 0 }, x, (v2di){ 0, 3 }); +} + +/* +** f_v2df: +** fmov d0, d0 +** ret +*/ +v2df +f_v2df (v2df x) +{ + return __builtin_shuffle (x, (v2df){ 0, 0 }, (v2di){ 2, 1 }); +} + +/* +** f_v4si: +** fmov d0, d0 +** ret +*/ +v4si +f_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 6, 7, 2, 3 }); +} + +/* +** g_v4si: +** fmov d0, d0 +** ret +*/ +v4si +g_v4si (v4si x) +{ + return __builtin_shuffle ((v4si){ 0, 0, 0, 0 }, x, (v4si){ 2, 3, 6, 7 }); +} + +/* +** h_v4si: +** fmov s0, s0 +** ret +*/ +v4si +h_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 4, 5, 6, 3 }); +} + +/* +** f_v4sf: +** fmov d0, d0 +** ret +*/ +v4sf +f_v4sf (v4sf x) +{ + return __builtin_shuffle (x, (v4sf){ 0, 0, 0, 0 }, (v4si){ 6, 7, 2, 3 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-be-2.c b/gcc/testsuite/gcc.target/aarch64/fmov-be-2.c new file mode 100644 index 00000000000..a7764019994 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-be-2.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle ((v4hi){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** f_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +g_v8hi (v8hi x) +{ + return __builtin_shuffle ((v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-be-3.c b/gcc/testsuite/gcc.target/aarch64/fmov-be-3.c new file mode 100644 index 00000000000..de9f927da0c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-be-3.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef __bf16 v4bf __attribute__ ((vector_size (8))); +typedef __bf16 v8bf __attribute__ ((vector_size (16))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +f_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +g_v4hf (v4hf x) +{ + return __builtin_shuffle ((v4hf){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** h_v4hf: +** fmov s0, s0 +** ret +*/ +v4hf +h_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +f_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +g_v8hf (v8hf x) +{ + return __builtin_shuffle ((v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} + +/* +** h_v8hf: +** fmov s0, s0 +** ret +*/ +v8hf +h_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} + +/* +** f_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +f_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +g_v4bf (v4bf x) +{ + return __builtin_shuffle ((v4bf){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** h_v4bf: +** fmov s0, s0 +** ret +*/ +v4bf +h_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +f_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +g_v8bf (v8bf x) +{ + return __builtin_shuffle ((v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} + +/* +** h_v8bf: +** fmov s0, s0 +** ret +*/ +v8bf +h_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c b/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c index b34b902cf27..025350500c6 100644 --- a/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c +++ b/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c @@ -28,8 +28,10 @@ FOO(float64x2_t) /* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[1\], wzr} 2 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-times {ins\tv[0-9]+\.h\[1\], wzr} 4 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.s\[1\], wzr} 4 { target aarch64_little_endian } } } */ -/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.d\[1\], xzr} 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.s\[1\], wzr} 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-not {ins\tv[0-9]+\.d\[1\], xzr} { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {fmov\ts0, s0} 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {fmov\td0, d0} 2 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[6\], wzr} 1 { target aarch64_big_endian } } } */ /* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[14\], wzr} 1 { target aarch64_big_endian } } } */ -- 2.17.1