https://gcc.gnu.org/g:226d5fd59dc8ff0ba8cb07926d9a0e9ac277bad8
commit r16-6381-g226d5fd59dc8ff0ba8cb07926d9a0e9ac277bad8 Author: Claudio Bantaloukas <[email protected]> Date: Wed Dec 24 11:41:26 2025 +0000 aarch64: add multi-vector floating-point adjust exponent intrinsics This patch adds the following intrinsics (all __arm_streaming only) along with asm tests for them. - FSCALE (multiple and single vector) - svfloat16x2_t svscale[_single_f16_x2](svfloat16x2_t zd, svint16_t zm) - svfloat32x2_t svscale[_single_f32_x2](svfloat32x2_t zd, svint32_t zm) - svfloat64x2_t svscale[_single_f64_x2](svfloat64x2_t zd, svint64_t zm) - svfloat16x4_t svscale[_single_f16_x4](svfloat16x4_t zd, svint16_t zm) - svfloat32x4_t svscale[_single_f32_x4](svfloat32x4_t zd, svint32_t zm) - svfloat64x4_t svscale[_single_f64_x4](svfloat64x4_t zd, svint64_t zm) - FSCALE (multiple vectors) - svfloat16x2_t svscale[_f16_x2](svfloat16x2_t zd, svint16x2_t zm) - svfloat32x2_t svscale[_f32_x2](svfloat32x2_t zd, svint32x2_t zm) - svfloat64x2_t svscale[_f64_x2](svfloat64x2_t zd, svint64x2_t zm) - svfloat16x4_t svscale[_f16_x4](svfloat16x4_t zd, svint16x4_t zm) - svfloat32x4_t svscale[_f32_x4](svfloat32x4_t zd, svint32x4_t zm) - svfloat64x4_t svscale[_f64_x4](svfloat64x4_t zd, svint64x4_t zm) Test structure is based on the urshl ones that have a similar structure in how they treat arguments. gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svscale_impl): Added new class for dealing with all svscale functions (including sve) (svscale): updated FUNCTION macro call to make use of new class. * config/aarch64/aarch64-sve-builtins-sve2.def: (svscale): Added new DEF_SVE_FUNCTION_GS call to enable recognition of new variant. * config/aarch64/aarch64-sve2.md (@aarch64_sve_fscale<mode>): Added new define_insn. (@aarch64_sve_single_fscale<mode>): Likewise. * config/aarch64/iterators.md: (SVE_Fx24_NOBF): Added new iterator, similar to SVE_Fx24 but without brainfloat. (SVE_Fx24): Updated to make use of SVE_Fx24_NOBF. (SVSCALE_SINGLE_INTARG): Added new mode_attr. (SVSCALE_INTARG): Likewise. gcc/testsuite/ * gcc.target/aarch64/sme2/acle-asm/scale_f16_x2.c: : Added test file. * gcc.target/aarch64/sme2/acle-asm/scale_f16_x4.c: : Likewise. * gcc.target/aarch64/sme2/acle-asm/scale_f32_x2.c: : Added test file. * gcc.target/aarch64/sme2/acle-asm/scale_f32_x4.c: : Likewise. * gcc.target/aarch64/sme2/acle-asm/scale_f64_x2.c: : Added test file. * gcc.target/aarch64/sme2/acle-asm/scale_f64_x4.c: : Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 21 +- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 1 + gcc/config/aarch64/aarch64-sve2.md | 28 +++ gcc/config/aarch64/iterators.md | 24 ++- .../aarch64/sme2/acle-asm/scale_f16_x2.c | 192 +++++++++++++++++ .../aarch64/sme2/acle-asm/scale_f16_x4.c | 229 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/scale_f32_x2.c | 208 +++++++++++++++++++ .../aarch64/sme2/acle-asm/scale_f32_x4.c | 229 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/scale_f64_x2.c | 208 +++++++++++++++++++ .../aarch64/sme2/acle-asm/scale_f64_x4.c | 229 +++++++++++++++++++++ 10 files changed, 1366 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 622485effb38..ca6e16578e8b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -3465,6 +3465,25 @@ public: unsigned int m_base; }; +class svscale_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + if (vectors_per_tuple (e) == 1) + return e.map_to_unspecs (-1, -1, UNSPEC_COND_FSCALE); + else + { + machine_mode mode = GET_MODE (e.args[0]); + insn_code code = (e.mode_suffix_id == MODE_single + ? code_for_aarch64_sve_single_fscale (mode) + : code_for_aarch64_sve_fscale (mode)); + return e.use_exact_insn (code); + } + } +}; + } /* end anonymous namespace */ namespace aarch64_sve { @@ -3706,7 +3725,7 @@ FUNCTION (svrintx, svrint_impl, (rint_optab, UNSPEC_COND_FRINTX)) FUNCTION (svrintz, svrint_impl, (btrunc_optab, UNSPEC_COND_FRINTZ)) FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) -FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) +FUNCTION (svscale, svscale_impl,) FUNCTION (svsel, svsel_impl,) FUNCTION (svset2, svset_impl, (2)) FUNCTION (svset3, svset_impl, (3)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 869e006ffdeb..bbee42343883 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -428,4 +428,5 @@ DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvtl2, unary_convert, cvt_mf8, x2, none, set) +DEF_SVE_FUNCTION_GS (svscale, binary_int_opt_single_n, all_float, x24, none) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index d1981b9b833d..407f1698c41a 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -58,6 +58,7 @@ ;; ---- [INT] Saturating left shifts ;; ---- [FP] Non-widening bfloat16 arithmetic ;; ---- [FP] Clamp to minimum/maximum +;; ---- [FP] Scaling by powers of two ;; ;; == Uniform ternary arithmnetic ;; ---- [INT] General ternary arithmetic that maps to unspecs @@ -1481,6 +1482,33 @@ [(set_attr "sve_type" "sve_fp_arith")] ) +;; ------------------------------------------------------------------------- +;; ---- [FP] Scaling by powers of two +;; ------------------------------------------------------------------------- +;; Includes the multiple and single vector and multiple vectors forms of +;; - FSCALE +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_fscale<mode>" + [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24_NOBF + [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "0") + (match_operand:<SVSCALE_INTARG> 2 "register_operand" "Uw<vector_count>")] + UNSPEC_FSCALE))] + "TARGET_STREAMING_SME2 && TARGET_FP8" + "fscale\t%0, %1, %2" +) + +(define_insn "@aarch64_sve_single_fscale<mode>" + [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24_NOBF + [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "0") + (match_operand:<SVSCALE_SINGLE_INTARG> 2 "register_operand" "x")] + UNSPEC_FSCALE))] + "TARGET_STREAMING_SME2 && TARGET_FP8" + "fscale\t%0, %1, %2.<Vetype>" +) + ;; ========================================================================= ;; == Uniform ternary arithmnetic ;; ========================================================================= diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7ec731d2d6b4..c8a54b80e3b2 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -735,10 +735,12 @@ (define_mode_iterator SVE_Ix24 [VNx32QI VNx16HI VNx8SI VNx4DI VNx64QI VNx32HI VNx16SI VNx8DI]) +(define_mode_iterator SVE_Fx24_NOBF [VNx16HF VNx8SF VNx4DF + VNx32HF VNx16SF VNx8DF]) + (define_mode_iterator SVE_Fx24 [(VNx16BF "TARGET_SSVE_B16B16") (VNx32BF "TARGET_SSVE_B16B16") - VNx16HF VNx8SF VNx4DF - VNx32HF VNx16SF VNx8DF]) + SVE_Fx24_NOBF]) (define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF]) @@ -2790,6 +2792,24 @@ (define_mode_attr LD1_EXTENDQ_MEM [(VNx4SI "VNx1SI") (VNx4SF "VNx1SI") (VNx2DI "VNx1DI") (VNx2DF "VNx1DI")]) +;; Maps the output type of svscale to the corresponding int vector type in the +;; second argument. +(define_mode_attr SVSCALE_SINGLE_INTARG [(VNx16HF "VNx8HI") ;; f16_x2 -> s16 + (VNx32HF "VNx8HI") ;; f16_x4 -> s16 + (VNx8SF "VNx4SI") ;; f32_x2 -> s32 + (VNx16SF "VNx4SI") ;; f32_x4 -> s32 + (VNx4DF "VNx2DI") ;; f64_x2 -> s64 + (VNx8DF "VNx2DI") ;; f64_x4 -> s64 +]) + +(define_mode_attr SVSCALE_INTARG [(VNx16HF "VNx16HI") ;; f16_x2 -> s16x2 + (VNx32HF "VNx32HI") ;; f16_x4 -> s16x4 + (VNx8SF "VNx8SI") ;; f32_x2 -> s32_x2 + (VNx16SF "VNx16SI") ;; f32_x4 -> s32_x4 + (VNx4DF "VNx4DI") ;; f64_x2 -> s64_x2 + (VNx8DF "VNx8DI") ;; f64_x4 -> s64_x4 +]) + ;; ------------------------------------------------------------------- ;; Code Iterators ;; ------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x2.c new file mode 100644 index 000000000000..4535a94c1bf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x2.c @@ -0,0 +1,192 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** scale_z0_z0_z4: +** fscale {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (scale_z0_z0_z4, svfloat16x2_t, svint16x2_t, z0, + svscale_f16_x2 (z0, z4), + svscale (z0, z4)) + +/* +** scale_z4_z4_z0: +** fscale {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_DUAL_XN (scale_z4_z4_z0, svint16x2_t, svfloat16x2_t, z4, + svscale_f16_x2 (z4, z0), + svscale (z4, z0)) + +/* +** scale_z18_z18_z4: +** fscale {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (scale_z18_z18_z4, svfloat16x2_t, svint16x2_t, z18, + svscale_f16_x2 (z18, z4), + svscale (z18, z4)) + +/* +** scale_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (scale_z23_z23_z18, svint16x2_t, svfloat16x2_t, z23, + svscale_f16_x2 (z23, z18), + svscale (z23, z18)) + + +/* +** scale_z28_z28_z4: +** fscale {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_DUAL_XN (scale_z28_z28_z4, svfloat16x2_t, svint16x2_t, z28, + svscale_f16_x2 (z28, z4), + svscale (z28, z4)) + +/* +** scale_z4_z4_z18: +** fscale {z4\.h - z5\.h}, {z4\.h - z5\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_DUAL_XN (scale_z4_z4_z18, svint16x2_t, svfloat16x2_t, z4, + svscale_f16_x2 (z4, z18), + svscale (z4, z18)) + +/* +** scale_z28_28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+ +** | +** fscale {z28\.h - z29\.h}, {z28\.h - z29\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (scale_z28_28_z23, svfloat16x2_t, svint16x2_t, z28, + svscale_f16_x2 (z28, z23), + svscale (z28, z23)) + +/* +** scale_single_z24_z24_z0: +** fscale {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (scale_single_z24_z24_z0, svfloat16x2_t, svint16_t, z24, + svscale_single_f16_x2 (z24, z0), + svscale (z24, z0)) + +/* +** scale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** | +** fscale {z28\.h - z29\.h}, {z28\.h - z29\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (scale_single_z24_z28_z0, svfloat16x2_t, svint16_t, z24, + svscale_single_f16_x2 (z28, z0), + svscale (z28, z0)) + +/* +** scale_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fscale {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (scale_single_z24_z1_z0, svfloat16x2_t, svint16_t, z24, + svscale_single_f16_x2 (z1, z0), + svscale (z1, z0)) + +/* +** scale_single_z1_z24_z0: +** fscale {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (scale_single_z1_z24_z0, svfloat16x2_t, svint16_t, z1, + svscale_single_f16_x2 (z24, z0), + svscale (z24, z0)) + +/* +** scale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (scale_single_z1_z1_z0, svfloat16x2_t, svint16_t, z1, + svscale_single_f16_x2 (z1, z0), + svscale (z1, z0)) + +/* +** scale_single_z18_z18_z0: +** fscale {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (scale_single_z18_z18_z0, svfloat16x2_t, svint16_t, z18, + svscale_single_f16_x2 (z18, z0), + svscale (z18, z0)) + +/* +** scale_single_awkward: +** ... +** fscale ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (scale_single_awkward, svfloat16x2_t, svint16_t, + z0_res = svscale_single_f16_x2 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** scale_single_z0_z0_z15: +** ... +** fscale {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (scale_single_z0_z0_z15, svfloat16x2_t, svint16_t, + z0 = svscale_single_f16_x2 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** scale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (scale_single_z24_z24_z16, svfloat16x2_t, svint16_t, z24, + svscale_single_f16_x2 (z24, z16), + svscale (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x4.c new file mode 100644 index 000000000000..b3c5a482052f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f16_x4.c @@ -0,0 +1,229 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** fscale_z0_z0_z4: +** fscale {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_DUAL_XN (fscale_z0_z0_z4, svfloat16x4_t, svint16x4_t, z0, + svscale_f16_x4 (z0, z4), + svscale (z0, z4)) + +/* +** fscale_z4_z4_z0: +** fscale {z4\.h - z7\.h}, {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_DUAL_XN (fscale_z4_z4_z0, svint16x4_t, svfloat16x4_t, z4, + svscale_f16_x4 (z4, z0), + svscale (z4, z0)) + +/* +** fscale_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (fscale_z18_z18_z4, svfloat16x4_t, svint16x4_t, z18, + svscale_f16_x4 (z18, z4), + svscale (z18, z4)) + +/* +** fscale_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (fscale_z23_z23_z28, svint16x4_t, svfloat16x4_t, z23, + svscale_f16_x4 (z23, z28), + svscale (z23, z28)) + +/* +** fscale_z28_z28_z4: +** fscale {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_DUAL_XN (fscale_z28_z28_z4, svfloat16x4_t, svint16x4_t, z28, + svscale_f16_x4 (z28, z4), + svscale (z28, z4)) + +/* +** fscale_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fscale {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (fscale_z4_z4_z18, svint16x4_t, svfloat16x4_t, z4, + svscale_f16_x4 (z4, z18), + svscale (z4, z18)) + +/* +** fscale_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fscale {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (fscale_z0_z0_z23, svfloat16x4_t, svint16x4_t, z0, + svscale_f16_x4 (z0, z23), + svscale (z0, z23)) + +/* +** fscale_single_z24_z24_z0: +** fscale {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (fscale_single_z24_z24_z0, svfloat16x4_t, svint16_t, z24, + svscale_single_f16_x4 (z24, z0), + svscale (z24, z0)) + +/* +** fscale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** | +** fscale {z28\.h - z31\.h}, {z28\.h - z31\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (fscale_single_z24_z28_z0, svfloat16x4_t, svint16_t, z24, + svscale_single_f16_x4 (z28, z0), + svscale (z28, z0)) + +/* +** fscale_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (fscale_single_z24_z1_z0, svfloat16x4_t, svint16_t, z24, + svscale_single_f16_x4 (z1, z0), + svscale (z1, z0)) + +/* +** fscale_single_z1_z24_z0: +** fscale {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (fscale_single_z1_z24_z0, svfloat16x4_t, svint16_t, z1, + svscale_single_f16_x4 (z24, z0), + svscale (z24, z0)) + +/* +** fscale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (fscale_single_z1_z1_z0, svfloat16x4_t, svint16_t, z1, + svscale_single_f16_x4 (z1, z0), + svscale (z1, z0)) + +/* +** fscale_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (fscale_single_z18_z18_z0, svfloat16x4_t, svint16_t, z18, + svscale_single_f16_x4 (z18, z0), + svscale (z18, z0)) + +/* +** fscale_single_awkward: +** ... +** fscale ({z[0-9]+\.h - z[0-9]+\.h}), \1, z[0-9]+\.h +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (fscale_single_awkward, svfloat16x4_t, svint16_t, + z0_res = svscale_single_f16_x4 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** fscale_single_z0_z0_z15: +** ... +** fscale {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (fscale_single_z0_z0_z15, svfloat16x4_t, svint16_t, + z0 = svscale_single_f16_x4 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** fscale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (fscale_single_z24_z24_z16, svfloat16x4_t, svint16_t, z24, + svscale_single_f16_x4 (z24, z16), + svscale (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x2.c new file mode 100644 index 000000000000..2375ea605967 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x2.c @@ -0,0 +1,208 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** svscale_z0_z0_z4: +** fscale {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z4, svfloat32x2_t, svint32x2_t, z0, + svscale_f32_x2 (z0, z4), + svscale (z0, z4)) + +/* +** svscale_z4_z4_z0: +** fscale {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z0, svint32x2_t, svfloat32x2_t, z4, + svscale_f32_x2 (z4, z0), + svscale (z4, z0)) + +/* +** svscale_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z4\.s - z5\.s} +** | +** fscale [^\n]+, {z4\.s - z5\.s} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z0_z28_z4, svfloat32x2_t, svint32x2_t, z0, + svscale_f32_x2 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z18_z18_z4: +** fscale {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z18_z18_z4, svfloat32x2_t, svint32x2_t, z18, + svscale_f32_x2 (z18, z4), + svscale (z18, z4)) + +/* +** svscale_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z23_z23_z18, svint32x2_t, svfloat32x2_t, z23, + svscale_f32_x2 (z23, z18), + svscale (z23, z18)) + +/* +** svscale_z28_z28_z4: +** fscale {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z4, svfloat32x2_t, svint32x2_t, z28, + svscale_f32_x2 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z4_z4_z18: +** fscale {z4\.s - z5\.s}, {z4\.s - z5\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z18, svint32x2_t, svfloat32x2_t, z4, + svscale_f32_x2 (z4, z18), + svscale (z4, z18)) + +/* +** svscale_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+ +** | +** fscale {z28\.s - z29\.s}, {z28\.s - z29\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z23, svfloat32x2_t, svint32x2_t, z28, + svscale_f32_x2 (z28, z23), + svscale (z28, z23)) + +/* +** svscale_single_z24_z24_z0: +** fscale {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z0, svfloat32x2_t, svint32_t, z24, + svscale_single_f32_x2 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** | +** fscale {z28\.s - z29\.s}, {z28\.s - z29\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z28_z0, svfloat32x2_t, svint32_t, z24, + svscale_single_f32_x2 (z28, z0), + svscale (z28, z0)) + +/* +** svscale_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fscale {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z1_z0, svfloat32x2_t, svint32_t, z24, + svscale_single_f32_x2 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z1_z24_z0: +** fscale {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z24_z0, svfloat32x2_t, svint32_t, z1, + svscale_single_f32_x2 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z1_z0, svfloat32x2_t, svint32_t, z1, + svscale_single_f32_x2 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z18_z18_z0: +** fscale {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z18_z18_z0, svfloat32x2_t, svint32_t, z18, + svscale_single_f32_x2 (z18, z0), + svscale (z18, z0)) + +/* +** svscale_single_awkward: +** ... +** fscale ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (svscale_single_awkward, svfloat32x2_t, svint32_t, + z0_res = svscale_single_f32_x2 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** svscale_single_z0_z0_z15: +** ... +** fscale {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (svscale_single_z0_z0_z15, svfloat32x2_t, svint32_t, + z0 = svscale_single_f32_x2 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** svscale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z16, svfloat32x2_t, svint32_t, z24, + svscale_single_f32_x2 (z24, z16), + svscale (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x4.c new file mode 100644 index 000000000000..fc50de86ed37 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f32_x4.c @@ -0,0 +1,229 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** svscale_z0_z0_z4: +** fscale {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z4, svfloat32x4_t, svint32x4_t, z0, + svscale_f32_x4 (z0, z4), + svscale (z0, z4)) + +/* +** svscale_z4_z4_z0: +** fscale {z4\.s - z7\.s}, {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z0, svint32x4_t, svfloat32x4_t, z4, + svscale_f32_x4 (z4, z0), + svscale (z4, z0)) + +/* +** svscale_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z18_z18_z4, svfloat32x4_t, svint32x4_t, z18, + svscale_f32_x4 (z18, z4), + svscale (z18, z4)) + +/* +** svscale_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z23_z23_z28, svint32x4_t, svfloat32x4_t, z23, + svscale_f32_x4 (z23, z28), + svscale (z23, z28)) + +/* +** svscale_z28_z28_z4: +** fscale {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z4, svfloat32x4_t, svint32x4_t, z28, + svscale_f32_x4 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fscale {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z18, svint32x4_t, svfloat32x4_t, z4, + svscale_f32_x4 (z4, z18), + svscale (z4, z18)) + +/* +** svscale_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fscale {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z23, svfloat32x4_t, svint32x4_t, z0, + svscale_f32_x4 (z0, z23), + svscale (z0, z23)) + +/* +** svscale_single_z24_z24_z0: +** fscale {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z0, svfloat32x4_t, svint32_t, z24, + svscale_single_f32_x4 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** | +** fscale {z28\.s - z31\.s}, {z28\.s - z31\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z28_z0, svfloat32x4_t, svint32_t, z24, + svscale_single_f32_x4 (z28, z0), + svscale (z28, z0)) + +/* +** svscale_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z1_z0, svfloat32x4_t, svint32_t, z24, + svscale_single_f32_x4 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z1_z24_z0: +** fscale {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z24_z0, svfloat32x4_t, svint32_t, z1, + svscale_single_f32_x4 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z1_z0, svfloat32x4_t, svint32_t, z1, + svscale_single_f32_x4 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z18_z18_z0, svfloat32x4_t, svint32_t, z18, + svscale_single_f32_x4 (z18, z0), + svscale (z18, z0)) + +/* +** svscale_single_awkward: +** ... +** fscale ({z[0-9]+\.s - z[0-9]+\.s}), \1, z[0-9]+\.s +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (svscale_single_awkward, svfloat32x4_t, svint32_t, + z0_res = svscale_single_f32_x4 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** svscale_single_z0_z0_z15: +** ... +** fscale {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (svscale_single_z0_z0_z15, svfloat32x4_t, svint32_t, + z0 = svscale_single_f32_x4 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** svscale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z16, svfloat32x4_t, svint32_t, z24, + svscale_single_f32_x4 (z24, z16), + svscale (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x2.c new file mode 100644 index 000000000000..cedd918ed358 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x2.c @@ -0,0 +1,208 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** svscale_z0_z0_z4: +** fscale {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z4, svfloat64x2_t, svint64x2_t, z0, + svscale_f64_x2 (z0, z4), + svscale (z0, z4)) + +/* +** svscale_z4_z4_z0: +** fscale {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z0, svint64x2_t, svfloat64x2_t, z4, + svscale_f64_x2 (z4, z0), + svscale (z4, z0)) + +/* +** svscale_z0_z28_z4: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z4\.d - z5\.d} +** | +** fscale [^\n]+, {z4\.d - z5\.d} +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z0_z28_z4, svfloat64x2_t, svint64x2_t, z0, + svscale_f64_x2 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z18_z18_z4: +** fscale {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z18_z18_z4, svfloat64x2_t, svint64x2_t, z18, + svscale_f64_x2 (z18, z4), + svscale (z18, z4)) + +/* +** svscale_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z23_z23_z18, svint64x2_t, svfloat64x2_t, z23, + svscale_f64_x2 (z23, z18), + svscale (z23, z18)) + +/* +** svscale_z28_z28_z4: +** fscale {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z4, svfloat64x2_t, svint64x2_t, z28, + svscale_f64_x2 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z4_z4_z18: +** fscale {z4\.d - z5\.d}, {z4\.d - z5\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z18, svint64x2_t, svfloat64x2_t, z4, + svscale_f64_x2 (z4, z18), + svscale (z4, z18)) + +/* +** svscale_z28_z28_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+ +** | +** fscale {z28\.d - z29\.d}, {z28\.d - z29\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z23, svfloat64x2_t, svint64x2_t, z28, + svscale_f64_x2 (z28, z23), + svscale (z28, z23)) + +/* +** svscale_single_z24_z24_z0: +** fscale {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z0, svfloat64x2_t, svint64_t, z24, + svscale_single_f64_x2 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** | +** fscale {z28\.d - z29\.d}, {z28\.d - z29\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z28_z0, svfloat64x2_t, svint64_t, z24, + svscale_single_f64_x2 (z28, z0), + svscale (z28, z0)) + +/* +** svscale_single_z24_z1_z0: +** ( +** mov z24\.d, z1\.d +** mov z25\.d, z2\.d +** | +** mov z25\.d, z2\.d +** mov z24\.d, z1\.d +** ) +** fscale {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z1_z0, svfloat64x2_t, svint64_t, z24, + svscale_single_f64_x2 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z1_z24_z0: +** fscale {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z1\.d, z24\.d +** mov z2\.d, z25\.d +** | +** mov z2\.d, z25\.d +** mov z1\.d, z24\.d +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z24_z0, svfloat64x2_t, svint64_t, z1, + svscale_single_f64_x2 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z1_z0, svfloat64x2_t, svint64_t, z1, + svscale_single_f64_x2 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z18_z18_z0: +** fscale {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z18_z18_z0, svfloat64x2_t, svint64_t, z18, + svscale_single_f64_x2 (z18, z0), + svscale (z18, z0)) + +/* +** svscale_single_awkward: +** ... +** fscale ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (svscale_single_awkward, svfloat64x2_t, svint64_t, + z0_res = svscale_single_f64_x2 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** svscale_single_z0_z0_z15: +** ... +** fscale {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (svscale_single_z0_z0_z15, svfloat64x2_t, svint64_t, + z0 = svscale_single_f64_x2 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** svscale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z16, svfloat64x2_t, svint64_t, z24, + svscale_single_f64_x2 (z24, z16), + svscale (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x4.c new file mode 100644 index 000000000000..13eeb1a2e024 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/scale_f64_x4.c @@ -0,0 +1,229 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** svscale_z0_z0_z4: +** fscale {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z4, svfloat64x4_t, svint64x4_t, z0, + svscale_f64_x4 (z0, z4), + svscale (z0, z4)) + +/* +** svscale_z4_z4_z0: +** fscale {z4\.d - z7\.d}, {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z0, svint64x4_t, svfloat64x4_t, z4, + svscale_f64_x4 (z4, z0), + svscale (z4, z0)) + +/* +** svscale_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z18_z18_z4, svfloat64x4_t, svint64x4_t, z18, + svscale_f64_x4 (z18, z4), + svscale (z18, z4)) + +/* +** svscale_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_DUAL_XN (svscale_z23_z23_z28, svint64x4_t, svfloat64x4_t, z23, + svscale_f64_x4 (z23, z28), + svscale (z23, z28)) + +/* +** svscale_z28_z28_z4: +** fscale {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_DUAL_XN (svscale_z28_z28_z4, svfloat64x4_t, svint64x4_t, z28, + svscale_f64_x4 (z28, z4), + svscale (z28, z4)) + +/* +** svscale_z4_z4_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fscale {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z4_z4_z18, svint64x4_t, svfloat64x4_t, z4, + svscale_f64_x4 (z4, z18), + svscale (z4, z18)) + +/* +** svscale_z0_z0_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fscale {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_DUAL_XN (svscale_z0_z0_z23, svfloat64x4_t, svint64x4_t, z0, + svscale_f64_x4 (z0, z23), + svscale (z0, z23)) + +/* +** svscale_single_z24_z24_z0: +** fscale {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z0, svfloat64x4_t, svint64_t, z24, + svscale_single_f64_x4 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z24_z28_z0: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** | +** fscale {z28\.d - z31\.d}, {z28\.d - z31\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z28_z0, svfloat64x4_t, svint64_t, z24, + svscale_single_f64_x4 (z28, z0), + svscale (z28, z0)) + +/* +** svscale_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z1_z0, svfloat64x4_t, svint64_t, z24, + svscale_single_f64_x4 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z1_z24_z0: +** fscale {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z24_z0, svfloat64x4_t, svint64_t, z1, + svscale_single_f64_x4 (z24, z0), + svscale (z24, z0)) + +/* +** svscale_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z1_z1_z0, svfloat64x4_t, svint64_t, z1, + svscale_single_f64_x4 (z1, z0), + svscale (z1, z0)) + +/* +** svscale_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fscale [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (svscale_single_z18_z18_z0, svfloat64x4_t, svint64_t, z18, + svscale_single_f64_x4 (z18, z0), + svscale (z18, z0)) + +/* +** svscale_single_awkward: +** ... +** fscale ({z[0-9]+\.d - z[0-9]+\.d}), \1, z[0-9]+\.d +** ... +** ret +*/ +TEST_XN_SINGLE_AWKWARD (svscale_single_awkward, svfloat64x4_t, svint64_t, + z0_res = svscale_single_f64_x4 (z1, z0), + z0_res = svscale (z1, z0)) + +/* +** svscale_single_z0_z0_z15: +** ... +** fscale {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (svscale_single_z0_z0_z15, svfloat64x4_t, svint64_t, + z0 = svscale_single_f64_x4 (z0, z15), + z0 = svscale (z0, z15)) + +/* +** svscale_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fscale {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (svscale_single_z24_z24_z16, svfloat64x4_t, svint64_t, z24, + svscale_single_f64_x4 (z24, z16), + svscale (z24, z16))
