https://gcc.gnu.org/g:538204079b2fc9145e0cae61aacda493e1037327
commit r15-5788-g538204079b2fc9145e0cae61aacda493e1037327 Author: Claudio Bantaloukas <claudio.bantalou...@arm.com> Date: Fri Nov 29 12:52:44 2024 +0000 aarch64: add SVE2 FP8 multiply accumulate intrinsics This patch adds support for the following intrinsics: - svmlalb[_f16_mf8]_fpm - svmlalb[_n_f16_mf8]_fpm - svmlalt[_f16_mf8]_fpm - svmlalt[_n_f16_mf8]_fpm - svmlalb_lane[_f16_mf8]_fpm - svmlalt_lane[_f16_mf8]_fpm - svmlallbb[_f32_mf8]_fpm - svmlallbb[_n_f32_mf8]_fpm - svmlallbt[_f32_mf8]_fpm - svmlallbt[_n_f32_mf8]_fpm - svmlalltb[_f32_mf8]_fpm - svmlalltb[_n_f32_mf8]_fpm - svmlalltt[_f32_mf8]_fpm - svmlalltt[_n_f32_mf8]_fpm - svmlallbb_lane[_f32_mf8]_fpm - svmlallbt_lane[_f32_mf8]_fpm - svmlalltb_lane[_f32_mf8]_fpm - svmlalltt_lane[_f32_mf8]_fpm These are available under a combination of the FP8FMA and SVE2 features. Alternatively under the SSVE_FP8FMA feature under streaming mode. gcc/ * config/aarch64/aarch64-option-extensions.def (fp8fma, ssve-fp8fma): Add new options. * config/aarch64/aarch64-sve-builtins-functions.h (unspec_based_function_base): Add unspec_for_mfp8. (unspec_for): Return unspec_for_mfp8 on fpm-using cases. (sme_1mode_function): Fix call to parent ctor. (sme_2mode_function_t): Likewise. (unspec_based_mla_function, unspec_based_mla_lane_function): Handle fpm-using cases. * config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type): Treat M as TYPE_SUFFIX_mf8 (ternary_mfloat8_lane_def): Add new class. (ternary_mfloat8_opt_n_def): Likewise. (ternary_mfloat8_lane): Add new shape. (ternary_mfloat8_opt_n): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.h (ternary_mfloat8_lane, ternary_mfloat8_opt_n): Declare. * config/aarch64/aarch64-sve-builtins-sve2.cc (svmlalb_lane, svmlalb, svmlalt_lane, svmlalt): Update definitions with mfloat8_t unspec in ctor. (svmlallbb_lane, svmlallbb, svmlallbt_lane, svmlallbt, svmlalltb_lane, svmlalltb, svmlalltt_lane, svmlalltt, svmlal_impl): Add new FUNCTIONs. (svqrshr, svqrshrn, svqrshru, svqrshrun): Update definitions with nop mfloat8 unspec in ctor. * config/aarch64/aarch64-sve-builtins-sve2.def (svmlalb, svmlalt, svmlalb_lane, svmlalt_lane, svmlallbb, svmlallbt, svmlalltb, svmlalltt, svmlalltt_lane, svmlallbb_lane, svmlallbt_lane, svmlalltb_lane): Add new DEF_SVE_FUNCTION_GS_FPMs. * config/aarch64/aarch64-sve-builtins-sve2.h (svmlallbb_lane, svmlallbb, svmlallbt_lane, svmlallbt, svmlalltb_lane, svmlalltb, svmlalltt_lane, svmlalltt): Declare. * config/aarch64/aarch64-sve-builtins.cc (TYPES_h_float_mf8, TYPES_s_float_mf8): Add new types. (h_float_mf8, s_float_mf8): Add new SVE_TYPES_ARRAY. * config/aarch64/aarch64-sve2.md (@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>): Add new. (@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>): Add new. (@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>): Likewise. (@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>): Likewise. * config/aarch64/aarch64.h (TARGET_FP8FMA, TARGET_SSVE_FP8FMA): Likewise. * config/aarch64/iterators.md (VNx8HF_ONLY): Add new. (UNSPEC_FMLALB_FP8, UNSPEC_FMLALLBB_FP8, UNSPEC_FMLALLBT_FP8, UNSPEC_FMLALLTB_FP8, UNSPEC_FMLALLTT_FP8, UNSPEC_FMLALT_FP8): Likewise. (SVE2_FP8_TERNARY_VNX8HF, SVE2_FP8_TERNARY_VNX4SF): Likewise. (SVE2_FP8_TERNARY_LANE_VNX8HF, SVE2_FP8_TERNARY_LANE_VNX4SF): Likewise. (sve2_fp8_fma_op_vnx8hf, sve2_fp8_fma_op_vnx4sf): Likewise. * doc/invoke.texi: Document fp8fma and sve-fp8fma extensions. gcc/testsuite/ * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_Z_REV, TEST_DUAL_LANE_REG, TEST_DUAL_ZD) Add fpm0 argument. * gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c: Add new shape test. * gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c: Add new test. * gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c: Likewise. * lib/target-supports.exp: Add check_effective_target for fp8fma and ssve-fp8fma Diff: --- gcc/config/aarch64/aarch64-option-extensions.def | 4 + .../aarch64/aarch64-sve-builtins-functions.h | 16 +++- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 78 +++++++++++++++++++ gcc/config/aarch64/aarch64-sve-builtins-shapes.h | 2 + gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 46 +++++++---- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 17 ++++ gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 8 ++ gcc/config/aarch64/aarch64-sve-builtins.cc | 10 +++ gcc/config/aarch64/aarch64-sve2.md | 81 +++++++++++++++++++ gcc/config/aarch64/aarch64.h | 9 +++ gcc/config/aarch64/iterators.md | 37 +++++++++ gcc/doc/invoke.texi | 5 ++ .../aarch64/sve/acle/asm/test_sve_acle.h | 6 +- .../sve/acle/general-c/ternary_mfloat8_lane_1.c | 84 ++++++++++++++++++++ .../sve/acle/general-c/ternary_mfloat8_opt_n_1.c | 60 ++++++++++++++ .../aarch64/sve2/acle/asm/mlalb_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c | 78 +++++++++++++++++++ .../aarch64/sve2/acle/asm/mlallbb_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c | 78 +++++++++++++++++++ .../aarch64/sve2/acle/asm/mlallbt_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c | 78 +++++++++++++++++++ .../aarch64/sve2/acle/asm/mlalltb_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c | 78 +++++++++++++++++++ .../aarch64/sve2/acle/asm/mlalltt_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c | 78 +++++++++++++++++++ .../aarch64/sve2/acle/asm/mlalt_lane_mf8.c | 91 ++++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c | 78 +++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 3 +- 28 files changed, 1458 insertions(+), 22 deletions(-) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index f4cf66182387..002d5ab562d4 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -245,6 +245,10 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") +AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (FP8), (), (), "fp8fma") + +AARCH64_OPT_EXTENSION("ssve-fp8fma", SSVE_FP8FMA, (SME2,FP8), (), (), "ssve-fp8fma") + AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") #undef AARCH64_OPT_FMV_EXTENSION diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index 409062ca3ddd..93fdd10ebc0f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -270,10 +270,12 @@ public: CONSTEXPR unspec_based_function_base (int unspec_for_sint, int unspec_for_uint, int unspec_for_fp, + int unspec_for_mfp8 = -1, unsigned int suffix_index = 0) : m_unspec_for_sint (unspec_for_sint), m_unspec_for_uint (unspec_for_uint), m_unspec_for_fp (unspec_for_fp), + m_unspec_for_mfp8 (unspec_for_mfp8), m_suffix_index (suffix_index) {} @@ -281,6 +283,9 @@ public: int unspec_for (const function_instance &instance) const { + if (instance.fpm_mode == FPM_set) + return m_unspec_for_mfp8; + auto &suffix = instance.type_suffix (m_suffix_index); return (!suffix.integer_p ? m_unspec_for_fp : suffix.unsigned_p ? m_unspec_for_uint @@ -292,6 +297,7 @@ public: int m_unspec_for_sint; int m_unspec_for_uint; int m_unspec_for_fp; + int m_unspec_for_mfp8; /* Which type suffix is used to choose between the unspecs. */ unsigned int m_suffix_index; @@ -427,7 +433,7 @@ public: CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint, int unspec_for_fp) - : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) + : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1) {} rtx @@ -457,7 +463,7 @@ public: CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint, int unspec_for_fp) - : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) + : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1) {} rtx @@ -496,7 +502,8 @@ public: { int unspec = unspec_for (e); insn_code icode; - if (e.type_suffix (m_suffix_index).float_p) + if (e.type_suffix (m_suffix_index).float_p + && e.fpm_mode != FPM_set) { /* Put the operands in the normal (fma ...) order, with the accumulator last. This fits naturally since that's also the unprinted operand @@ -526,7 +533,8 @@ public: { int unspec = unspec_for (e); insn_code icode; - if (e.type_suffix (m_suffix_index).float_p) + if (e.type_suffix (m_suffix_index).float_p + && e.fpm_mode != FPM_set) { /* Put the operands in the normal (fma ...) order, with the accumulator last. This fits naturally since that's also the unprinted operand diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 62831b3c1e29..94f4da8ce319 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -96,6 +96,7 @@ apply_predication (const function_instance &instance, tree return_type, B - bfloat16_t c - a predicate-as-counter h<elt> - a half-sized version of <elt> + M - mfloat8_t p - a predicate (represented as TYPE_SUFFIX_b) q<elt> - a quarter-sized version of <elt> s<bits> - a signed type with the given number of bits @@ -140,6 +141,9 @@ parse_element_type (const function_instance &instance, const char *&format) if (ch == 'B') return TYPE_SUFFIX_bf16; + if (ch == 'M') + return TYPE_SUFFIX_mf8; + if (ch == 'q') { type_suffix_index suffix = parse_element_type (instance, format); @@ -4015,6 +4019,44 @@ SHAPE (ternary_bfloat_lane) typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def; SHAPE (ternary_bfloat_lanex2) +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t) + + where the final argument is an integer constant expression in the range + [0, 15]. */ +struct ternary_mfloat8_lane_def + : public ternary_resize2_lane_base<8, TYPE_mfloat, TYPE_mfloat> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + gcc_assert (group.fpm_mode == FPM_set); + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,v0,vM,vM,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_lane_index (3, 2, 1); + } + + tree + resolve (function_resolver &r) const override + { + type_suffix_index type; + if (!r.check_num_arguments (5) + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES + || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t) + || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t) + || !r.require_integer_immediate (3) + || !r.require_scalar_type (4, "uint64_t")) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none); + } +}; +SHAPE (ternary_mfloat8_lane) + /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t) sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t). */ struct ternary_bfloat_opt_n_def @@ -4030,6 +4072,42 @@ struct ternary_bfloat_opt_n_def }; SHAPE (ternary_bfloat_opt_n) +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloatt8_t, svmfloat8_t) + sv<t0>_t svfoo[_n_t0](sv<t0>_t, svmfloat8_t, bfloat8_t). */ +struct ternary_mfloat8_opt_n_def + : public ternary_resize2_opt_n_base<8, TYPE_mfloat, TYPE_mfloat> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + gcc_assert (group.fpm_mode == FPM_set); + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,v0,vM,vM", group, MODE_none); + build_all (b, "v0,v0,vM,sM", group, MODE_n); + } + + tree + resolve (function_resolver &r) const override + { + type_suffix_index type; + if (!r.check_num_arguments (4) + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES + || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t) + || !r.require_vector_or_scalar_type (2) + || !r.require_scalar_type (3, "uint64_t")) + return error_mark_node; + + auto mode = r.mode_suffix_id; + if (r.scalar_argument_p (2)) + mode = MODE_n; + else if (!r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)) + return error_mark_node; + + return r.resolve_to (mode, type, TYPE_SUFFIX_mf8, GROUP_none); + } +}; +SHAPE (ternary_mfloat8_opt_n) + /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t, uint64_t) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index dc3d45572883..1c8937ae027d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -213,6 +213,8 @@ namespace aarch64_sve extern const function_shape *const ternary_lane_rotate; extern const function_shape *const ternary_long_lane; extern const function_shape *const ternary_long_opt_n; + extern const function_shape *const ternary_mfloat8_lane; + extern const function_shape *const ternary_mfloat8_opt_n; extern const function_shape *const ternary_opt_n; extern const function_shape *const ternary_qq_or_011_lane; extern const function_shape *const ternary_qq_lane_rotate; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 6259017a087f..0eda53d52700 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -990,16 +990,34 @@ FUNCTION (svminnmqv, reduction, (-1, -1, UNSPEC_FMINNMQV)) FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP, UNSPEC_FMINP)) FUNCTION (svminqv, reduction, (UNSPEC_SMINQV, UNSPEC_UMINQV, UNSPEC_FMINQV)) -FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB, - UNSPEC_UMULLB, UNSPEC_FMLALB)) -FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB, - UNSPEC_UMULLB, - UNSPEC_FMLALB)) -FUNCTION (svmlalt, unspec_based_mla_function, (UNSPEC_SMULLT, - UNSPEC_UMULLT, UNSPEC_FMLALT)) -FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLT, - UNSPEC_UMULLT, - UNSPEC_FMLALT)) +FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, + (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB, + UNSPEC_FMLALB_FP8)) +FUNCTION (svmlalb, unspec_based_mla_function, + (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB, + UNSPEC_FMLALB_FP8)) +FUNCTION (svmlallbb_lane, unspec_based_mla_lane_function, + (-1, -1, -1, UNSPEC_FMLALLBB_FP8)) +FUNCTION (svmlallbb, unspec_based_mla_function, + (-1, -1, -1, UNSPEC_FMLALLBB_FP8)) +FUNCTION (svmlallbt_lane, unspec_based_mla_lane_function, + (-1, -1, -1, UNSPEC_FMLALLBT_FP8)) +FUNCTION (svmlallbt, unspec_based_mla_function, + (-1, -1, -1, UNSPEC_FMLALLBT_FP8)) +FUNCTION (svmlalltb_lane, unspec_based_mla_lane_function, + (-1, -1, -1, UNSPEC_FMLALLTB_FP8)) +FUNCTION (svmlalltb, unspec_based_mla_function, + (-1, -1, -1, UNSPEC_FMLALLTB_FP8)) +FUNCTION (svmlalltt_lane, unspec_based_mla_lane_function, + (-1, -1, -1, UNSPEC_FMLALLTT_FP8)) +FUNCTION (svmlalltt, unspec_based_mla_function, + (-1, -1, -1, UNSPEC_FMLALLTT_FP8)) +FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, + (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT, + UNSPEC_FMLALT_FP8)) +FUNCTION (svmlalt, unspec_based_mla_function, + (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT, + UNSPEC_FMLALT_FP8)) FUNCTION (svmlslb, unspec_based_mls_function, (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLSLB)) FUNCTION (svmlslb_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLB, @@ -1072,15 +1090,15 @@ FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH, -1, -1)) FUNCTION (svqrshl, svqrshl_impl,) FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR, - UNSPEC_UQRSHR, -1, 1)) + UNSPEC_UQRSHR, -1, -1, 1)) FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN, - UNSPEC_UQRSHRN, -1, 1)) + UNSPEC_UQRSHRN, -1, -1, 1)) FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB, UNSPEC_UQRSHRNB, -1)) FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT, UNSPEC_UQRSHRNT, -1)) -FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1)) -FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1)) +FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, -1, 1)) +FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, -1, 1)) FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1)) FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1)) FUNCTION (svqshl, svqshl_impl,) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 8a63998fcc6d..b489e8fad2fb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -379,3 +379,20 @@ DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtn_mf8, x2, none, set DEF_SVE_FUNCTION_GS_FPM (svcvtnb, unary_convertxn_narrow, cvtnx_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvtnt, unary_convertxn_narrowt, cvtnx_mf8, x2, none, set) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8FMA, \ + AARCH64_FL_SSVE_FP8FMA) +DEF_SVE_FUNCTION_GS_FPM (svmlalb, ternary_mfloat8_opt_n, h_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalt, ternary_mfloat8_opt_n, h_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalb_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalt_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlallbb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlallbt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalltb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalltt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalltt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index 6b3a96bb34fb..abeaee6916d3 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -108,6 +108,14 @@ namespace aarch64_sve extern const function_base *const svminqv; extern const function_base *const svmlalb; extern const function_base *const svmlalb_lane; + extern const function_base *const svmlallbb_lane; + extern const function_base *const svmlallbb; + extern const function_base *const svmlallbt_lane; + extern const function_base *const svmlallbt; + extern const function_base *const svmlalltb_lane; + extern const function_base *const svmlalltb; + extern const function_base *const svmlalltt_lane; + extern const function_base *const svmlalltt; extern const function_base *const svmlalt; extern const function_base *const svmlalt_lane; extern const function_base *const svmlslb; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 2518a33361bf..79dc81fcbb73 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -347,10 +347,18 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { TYPES_s_data (S, D), \ TYPES_d_data (S, D) +/* _f16_mf8. */ +#define TYPES_h_float_mf8(S, D) \ + D (f16, mf8) + /* _f32. */ #define TYPES_s_float(S, D) \ S (f32) +/* _f32_mf8. */ +#define TYPES_s_float_mf8(S, D) \ + D (f32, mf8) + /* _f32 _s16 _s32 _s64 _u16 _u32 _u64. */ @@ -777,6 +785,7 @@ DEF_SVE_TYPES_ARRAY (bhs_widen); DEF_SVE_TYPES_ARRAY (c); DEF_SVE_TYPES_ARRAY (h_bfloat); DEF_SVE_TYPES_ARRAY (h_float); +DEF_SVE_TYPES_ARRAY (h_float_mf8); DEF_SVE_TYPES_ARRAY (h_integer); DEF_SVE_TYPES_ARRAY (hs_signed); DEF_SVE_TYPES_ARRAY (hs_integer); @@ -788,6 +797,7 @@ DEF_SVE_TYPES_ARRAY (hsd_integer); DEF_SVE_TYPES_ARRAY (hsd_data); DEF_SVE_TYPES_ARRAY (s_float); DEF_SVE_TYPES_ARRAY (s_float_hsd_integer); +DEF_SVE_TYPES_ARRAY (s_float_mf8); DEF_SVE_TYPES_ARRAY (s_float_sd_integer); DEF_SVE_TYPES_ARRAY (s_signed); DEF_SVE_TYPES_ARRAY (s_unsigned); diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index e5bd2861b480..5498eac0b038 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -67,6 +67,7 @@ ;; ---- [INT] Shift-and-accumulate operations ;; ---- [INT] Shift-and-insert operations ;; ---- [INT] Sum of absolute differences +;; ---- [FP] Mfloat8 Multiply-and-accumulate operations ;; ;; == Extending arithmetic ;; ---- [INT] Multi-register widening conversions @@ -1993,6 +1994,86 @@ } ) +;; ------------------------------------------------------------------------- +;; ---- [FP] Mfloat8 Multiply-and-accumulate operations +;; ------------------------------------------------------------------------- +;; Includes: +;; - FMLALB (vectors, FP8 to FP16) +;; - FMLALT (vectors, FP8 to FP16) +;; - FMLALB (indexed, FP8 to FP16) +;; - FMLALT (indexed, FP8 to FP16) +;; - FMLALLBB (vectors) +;; - FMLALLBB (indexed) +;; - FMLALLBT (vectors) +;; - FMLALLBT (indexed) +;; - FMLALLTB (vectors) +;; - FMLALLTB (indexed) +;; - FMLALLTT (vectors) +;; - FMLALLTT (indexed) +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>" + [(set (match_operand:VNx8HF_ONLY 0 "register_operand") + (unspec:VNx8HF_ONLY + [(match_operand:VNx8HF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (reg:DI FPM_REGNUM)] + SVE2_FP8_TERNARY_VNX8HF))] + "TARGET_SSVE_FP8FMA" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b + } +) + +(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>" + [(set (match_operand:VNx4SF_ONLY 0 "register_operand") + (unspec:VNx4SF_ONLY + [(match_operand:VNx4SF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (reg:DI FPM_REGNUM)] + SVE2_FP8_TERNARY_VNX4SF))] + "TARGET_SSVE_FP8FMA" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , w ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b + } +) + +(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>" + [(set (match_operand:VNx8HF_ONLY 0 "register_operand") + (unspec:VNx8HF_ONLY + [(match_operand:VNx8HF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (match_operand:SI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + SVE2_FP8_TERNARY_LANE_VNX8HF))] + "TARGET_SSVE_FP8FMA" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4] + [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4] + } +) + +(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>" + [(set (match_operand:VNx4SF_ONLY 0 "register_operand") + (unspec:VNx4SF_ONLY + [(match_operand:VNx4SF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (match_operand:SI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + SVE2_FP8_TERNARY_LANE_VNX4SF))] + "TARGET_SSVE_FP8FMA" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , y ; * ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4] + [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4] + } +) + ;; ========================================================================= ;; == Extending arithmetic ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index f43b1659db6d..80a1fa407097 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -518,6 +518,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED && (TARGET_SVE2 || TARGET_STREAMING) \ && (TARGET_SME2 || TARGET_NON_STREAMING)) +/* fp8 multiply-accumulate instructions are enabled through +fp8fma. */ +#define TARGET_FP8FMA AARCH64_HAVE_ISA (FP8FMA) + +/* SVE2 versions of fp8 multiply-accumulate instructions are enabled for + non-streaming mode by +fp8fma and for streaming mode by +ssve-fp8fma. */ +#define TARGET_SSVE_FP8FMA \ + (((TARGET_SVE2 && TARGET_FP8FMA) || TARGET_STREAMING) \ + && (AARCH64_HAVE_ISA (SSVE_FP8FMA) || TARGET_NON_STREAMING)) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 26716d593de8..4b265a73d9a3 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -430,6 +430,7 @@ (define_mode_iterator VNx16QI_ONLY [VNx16QI]) (define_mode_iterator VNx16SI_ONLY [VNx16SI]) (define_mode_iterator VNx8HI_ONLY [VNx8HI]) +(define_mode_iterator VNx8HF_ONLY [VNx8HF]) (define_mode_iterator VNx8BF_ONLY [VNx8BF]) (define_mode_iterator VNx8SI_ONLY [VNx8SI]) (define_mode_iterator VNx8SF_ONLY [VNx8SF]) @@ -975,7 +976,13 @@ UNSPEC_FMINNMP ; Used in aarch64-sve2.md. UNSPEC_FMINP ; Used in aarch64-sve2.md. UNSPEC_FMLALB ; Used in aarch64-sve2.md. + UNSPEC_FMLALB_FP8 ; Used in aarch64-sve2.md. + UNSPEC_FMLALLBB_FP8 ; Used in aarch64-sve2.md. + UNSPEC_FMLALLBT_FP8 ; Used in aarch64-sve2.md. + UNSPEC_FMLALLTB_FP8 ; Used in aarch64-sve2.md. + UNSPEC_FMLALLTT_FP8 ; Used in aarch64-sve2.md. UNSPEC_FMLALT ; Used in aarch64-sve2.md. + UNSPEC_FMLALT_FP8 ; Used in aarch64-sve2.md. UNSPEC_FMLSLB ; Used in aarch64-sve2.md. UNSPEC_FMLSLT ; Used in aarch64-sve2.md. UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md. @@ -4755,3 +4762,33 @@ (UNSPEC_F2CVT "f2cvt") (UNSPEC_F1CVTLT "f1cvtlt") (UNSPEC_F2CVTLT "f2cvtlt")]) + +(define_int_iterator SVE2_FP8_TERNARY_VNX8HF + [UNSPEC_FMLALB_FP8 + UNSPEC_FMLALT_FP8]) + +(define_int_iterator SVE2_FP8_TERNARY_VNX4SF + [UNSPEC_FMLALLBB_FP8 + UNSPEC_FMLALLBT_FP8 + UNSPEC_FMLALLTB_FP8 + UNSPEC_FMLALLTT_FP8]) + +(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX8HF + [UNSPEC_FMLALB_FP8 + UNSPEC_FMLALT_FP8]) + +(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX4SF + [UNSPEC_FMLALLBB_FP8 + UNSPEC_FMLALLBT_FP8 + UNSPEC_FMLALLTB_FP8 + UNSPEC_FMLALLTT_FP8]) + +(define_int_attr sve2_fp8_fma_op_vnx8hf + [(UNSPEC_FMLALB_FP8 "fmlalb") + (UNSPEC_FMLALT_FP8 "fmlalt")]) + +(define_int_attr sve2_fp8_fma_op_vnx4sf + [(UNSPEC_FMLALLBB_FP8 "fmlallbb") + (UNSPEC_FMLALLBT_FP8 "fmlallbt") + (UNSPEC_FMLALLTB_FP8 "fmlalltb") + (UNSPEC_FMLALLTT_FP8 "fmlalltt")]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1b7b712085f0..2a4f016e2dff 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21952,6 +21952,11 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension. Enable the RCpc3 (Release Consistency) extension. @item fp8 Enable the fp8 (8-bit floating point) extension. +@item fp8fma +Enable the fp8 (8-bit floating point) multiply accumulate extension. +@item ssve-fp8fma +Enable the fp8 (8-bit floating point) multiply accumulate extension in streaming +mode. @item faminmax Enable the Floating Point Absolute Maximum/Minimum extension. @item sve-b16b16 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 4a146c3e1576..d3ae707ac490 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -84,7 +84,7 @@ #define TEST_DUAL_Z_REV(NAME, TYPE1, TYPE2, CODE1, CODE2) \ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \ TYPE1 z4, TYPE1 z5, TYPE1 z6, TYPE1 z7, \ - svbool_t p0, svbool_t p1)) \ + svbool_t p0, svbool_t p1, fpm_t fpm0)) \ { \ TYPE1 z0_res; \ INVOKE (CODE1, CODE2); \ @@ -136,7 +136,7 @@ } #define TEST_DUAL_LANE_REG(NAME, ZTYPE1, ZTYPE2, REG, CODE1, CODE2) \ - PROTO (NAME, void, (void)) \ + PROTO (NAME, void, (fpm_t fpm0)) \ { \ register ZTYPE1 z0 __asm ("z0"); \ register ZTYPE2 z1 __asm ("z1"); \ @@ -194,7 +194,7 @@ PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2, \ ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5, \ ZTYPE2 z6, STYPE d7, svbool_t p0, \ - svbool_t p1)) \ + svbool_t p1, fpm_t fpm0)) \ { \ INVOKE (CODE1, CODE2); \ return z0; \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c new file mode 100644 index 000000000000..6bdd3c06dc2b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c @@ -0,0 +1,84 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv8.2-a+ssve-fp8fma") + +void +f1 (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, + svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f, int i) + __arm_streaming +{ + svmlalb_lane_fpm (f16, f8, f8, 0, fpm); + svmlalb_lane_fpm (f16, f8, f8, 7, fpm); + svmlalb_lane_fpm (f16, f8, f8, 8, fpm); + svmlalb_lane_fpm (f16, f8, f8, 15, fpm); + + svmlalb_lane_fpm (f16); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, f8, 0); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */ + + svmlalb_lane_fpm (f16, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */ + svmlalb_lane_fpm (f16, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */ + + svmlalb_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane_fpm', which expects an SVE type rather than a scalar} } */ + svmlalb_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svmlalb_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svmlalb_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svmlalb_lane_fpm (f32, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svfloat32_t' and 'svmfloat8_t' arguments} } */ + svmlalb_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svmlalb_lane_fpm (f16, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_lane_fpm (f16, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_lane_fpm (f16, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_lane_fpm (f16, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */ + + svmlalb_lane_fpm (f16, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svmlalb_lane_fpm' must be an integer constant expression} } */ + svmlalb_lane_fpm (f16, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svmlalb_lane_fpm' must be an integer constant expression} } */ + svmlalb_lane_fpm (f16, f8, f8, 16, fpm); /* { dg-error {passing 16 to argument 4 of 'svmlalb_lane_fpm', which expects a value in the range \[0, 15\]} } */ + svmlalb_lane_fpm (f16, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svmlalb_lane_fpm', which expects a value in the range \[0, 15\]} } */ + svmlalb_lane_fpm (f16, f8, f8, 15, f8); /* { dg-error {passing 'svmfloat8_t' to argument 5 of 'svmlalb_lane_fpm', which expects 'uint64_t'} } */ + + + svmlallbb_lane_fpm (f32, f8, f8, 0, fpm); + svmlallbb_lane_fpm (f32, f8, f8, 7, fpm); + svmlallbb_lane_fpm (f32, f8, f8, 8, fpm); + svmlallbb_lane_fpm (f32, f8, f8, 15, fpm); + + svmlallbb_lane_fpm (f32); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, f8); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, f8, 0); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */ + + svmlallbb_lane_fpm (f32, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */ + svmlallbb_lane_fpm (f32, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */ + + svmlallbb_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlallbb_lane_fpm', which expects an SVE type rather than a scalar} } */ + svmlallbb_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_lane_fpm (f16, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svfloat16_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_lane_fpm (f32, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_lane_fpm (f32, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_lane_fpm (f32, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_lane_fpm (f32, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */ + + svmlallbb_lane_fpm (f32, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svmlallbb_lane_fpm' must be an integer constant expression} } */ + svmlallbb_lane_fpm (f32, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svmlallbb_lane_fpm' must be an integer constant expression} } */ + svmlallbb_lane_fpm (f32, f8, f8, 16, fpm); /* { dg-error {passing 16 to argument 4 of 'svmlallbb_lane_fpm', which expects a value in the range \[0, 15\]} } */ + svmlallbb_lane_fpm (f32, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svmlallbb_lane_fpm', which expects a value in the range \[0, 15\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c new file mode 100644 index 000000000000..1b6ff882e685 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv8.2-a+sve2+fp8fma") + +void +test (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, + svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f) +{ + svmlalb_fpm (f16, f8, f8, fpm); + svmlalt_fpm (f16, f8, f8, fpm); + svmlalb_fpm (f16, f8, f, fpm); + + svmlalb_fpm (f16); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */ + svmlalb_fpm (f16, f8); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */ + svmlalb_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */ + svmlalb_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */ + svmlalb_fpm (f16, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */ + svmlalb_fpm (f16, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svmlalb_fpm'} } */ + + svmlalt_fpm (f32, f8, f8, fpm); /* { dg-error {'svmlalt_fpm' has no form that takes 'svfloat32_t' and 'svmfloat8_t' arguments} } */ + svmlalb_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_fpm', which expects an SVE type rather than a scalar} } */ + svmlalb_fpm (pg, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svmlalb_fpm (u8, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svmlalb_fpm (u16, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svmlalb_fpm (f64, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svmlalb_fpm (f16, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_fpm (f16, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_fpm (f16, f8, 0, fpm); /* { dg-error {invalid conversion to type 'mfloat8_t'} } */ + svmlalb_fpm (f16, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */ + svmlalb_fpm (f16, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svmlalb_fpm', which expects 'uint64_t'} } */ + + + svmlallbb_fpm (f32, f8, f8, fpm); + svmlallbt_fpm (f32, f8, f8, fpm); + svmlalltb_fpm (f32, f8, f8, fpm); + svmlalltt_fpm (f32, f8, f8, fpm); + svmlallbb_fpm (f32, f8, f, fpm); + + svmlallbb_fpm (f16, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svfloat16_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_fpm (f32); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (f32, f8); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (f32, f8, f8); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (f32, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (f32, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svmlallbb_fpm'} } */ + svmlallbb_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlallbb_fpm', which expects an SVE type rather than a scalar} } */ + svmlallbb_fpm (pg, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_fpm (u8, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_fpm (u16, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_fpm (f64, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svmlallbb_fpm (f32, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_fpm (f32, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_fpm (f32, f8, 0, fpm); /* { dg-error {invalid conversion to type 'mfloat8_t'} } */ + svmlallbb_fpm (f32, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */ + svmlallbb_fpm (f32, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svmlallbb_fpm', which expects 'uint64_t'} } */ + +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c new file mode 100644 index 000000000000..e7af1b6dcc69 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalb_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlalb z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalb_lane_0_f16_tied1, svfloat16_t, svmfloat8_t, + z0 = svmlalb_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlalb_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlalb_lane_0_f16_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalb z0\.h, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalb_lane_0_f16_tied2, svfloat16_t, svmfloat8_t, + z0_res = svmlalb_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlalb_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlalb_lane_0_f16_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalb z0\.h, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalb_lane_0_f16_tied3, svfloat16_t, svmfloat8_t, + z0_res = svmlalb_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlalb_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlalb_lane_0_f16_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalb z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalb_lane_0_f16_untied, svfloat16_t, svmfloat8_t, + z0 = svmlalb_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlalb_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlalb_lane_1_f16: +** msr fpmr, x0 +** fmlalb z0\.h, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlalb_lane_1_f16, svfloat16_t, svmfloat8_t, + z0 = svmlalb_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlalb_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlalb_lane_z8_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlalb z0\.h, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlalb_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, + z0 = svmlalb_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlalb_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlalb_lane_z16_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlalb z0\.h, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlalb_lane_z16_f16, svfloat16_t, svmfloat8_t, z16, + z0 = svmlalb_lane_f16_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlalb_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c new file mode 100644 index 000000000000..424640031fb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalb_f16_mf8_tied1: +** msr fpmr, x0 +** fmlalb z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalb_f16_mf8_tied1, svfloat16_t, svmfloat8_t, + z0 = svmlalb_f16_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlalb_fpm (z0, z4, z5, fpm0)) + +/* +** mlalb_f16_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalb z0\.h, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalb_f16_mf8_tied2, svfloat16_t, svmfloat8_t, + z0_res = svmlalb_f16_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlalb_fpm (z4, z0, z1, fpm0)) + +/* +** mlalb_f16_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalb z0\.h, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalb_f16_mf8_tied3, svfloat16_t, svmfloat8_t, + z0_res = svmlalb_f16_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlalb_fpm (z4, z1, z0, fpm0)) + +/* +** mlalb_f16_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalb z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalb_f16_mf8_untied, svfloat16_t, svmfloat8_t, + z0 = svmlalb_f16_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlalb_fpm (z1, z4, z5, fpm0)) + +/* +** mlalb_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlalb z0\.h, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat16_t, svmfloat8_t, mfloat8_t, + z0 = svmlalb_n_f16_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlalb_fpm (z0, z4, d7, fpm0)) + +/* +** mlalb_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlalb z0\.h, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat16_t, svmfloat8_t, mfloat8_t, + z0 = svmlalb_n_f16_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlalb_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c new file mode 100644 index 000000000000..07a529d8dc9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlallbb_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlallbb z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlallbb_lane_0_f16_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlallbb_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlallbb_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlallbb_lane_0_f32_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbb z0\.s, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlallbb_lane_0_f32_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlallbb_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlallbb_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlallbb_lane_0_f32_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbb z0\.s, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlallbb_lane_0_f32_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlallbb_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlallbb_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlallbb_lane_0_f32_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlallbb z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlallbb_lane_0_f32_untied, svfloat32_t, svmfloat8_t, + z0 = svmlallbb_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlallbb_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlallbb_lane_1_f32: +** msr fpmr, x0 +** fmlallbb z0\.s, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlallbb_lane_1_f32, svfloat32_t, svmfloat8_t, + z0 = svmlallbb_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlallbb_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlallbb_lane_z8_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlallbb z0\.s, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlallbb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, + z0 = svmlallbb_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlallbb_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlallbb_lane_z16_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlallbb z0\.s, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlallbb_lane_z16_f32, svfloat32_t, svmfloat8_t, z16, + z0 = svmlallbb_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlallbb_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c new file mode 100644 index 000000000000..543cd9030d54 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlallbb_f32_mf8_tied1: +** msr fpmr, x0 +** fmlallbb z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlallbb_f32_mf8_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlallbb_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlallbb_fpm (z0, z4, z5, fpm0)) + +/* +** mlallbb_f32_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbb z0\.s, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlallbb_f32_mf8_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlallbb_f32_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlallbb_fpm (z4, z0, z1, fpm0)) + +/* +** mlallbb_f32_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbb z0\.s, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlallbb_f32_mf8_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlallbb_f32_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlallbb_fpm (z4, z1, z0, fpm0)) + +/* +** mlallbb_f32_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlallbb z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlallbb_f32_mf8_untied, svfloat32_t, svmfloat8_t, + z0 = svmlallbb_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlallbb_fpm (z1, z4, z5, fpm0)) + +/* +** mlalb_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlallbb z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlallbb_n_f32_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlallbb_fpm (z0, z4, d7, fpm0)) + +/* +** mlalb_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlallbb z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlallbb_n_f32_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlallbb_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c new file mode 100644 index 000000000000..9da29fbfb0b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlallbt_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlallbt z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlallbt_lane_0_f16_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlallbt_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlallbt_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlallbt_lane_0_f32_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbt z0\.s, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlallbt_lane_0_f32_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlallbt_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlallbt_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlallbt_lane_0_f32_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbt z0\.s, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlallbt_lane_0_f32_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlallbt_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlallbt_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlallbt_lane_0_f32_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlallbt z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlallbt_lane_0_f32_untied, svfloat32_t, svmfloat8_t, + z0 = svmlallbt_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlallbt_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlallbt_lane_1_f32: +** msr fpmr, x0 +** fmlallbt z0\.s, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlallbt_lane_1_f32, svfloat32_t, svmfloat8_t, + z0 = svmlallbt_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlallbt_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlallbt_lane_z8_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlallbt z0\.s, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlallbt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, + z0 = svmlallbt_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlallbt_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlallbt_lane_z16_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlallbt z0\.s, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlallbt_lane_z16_f32, svfloat32_t, svmfloat8_t, z16, + z0 = svmlallbt_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlallbt_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c new file mode 100644 index 000000000000..aa8299c66b38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlallbt_f32_mf8_tied1: +** msr fpmr, x0 +** fmlallbt z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlallbt_f32_mf8_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlallbt_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlallbt_fpm (z0, z4, z5, fpm0)) + +/* +** mlallbt_f32_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbt z0\.s, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlallbt_f32_mf8_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlallbt_f32_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlallbt_fpm (z4, z0, z1, fpm0)) + +/* +** mlallbt_f32_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlallbt z0\.s, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlallbt_f32_mf8_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlallbt_f32_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlallbt_fpm (z4, z1, z0, fpm0)) + +/* +** mlallbt_f32_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlallbt z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlallbt_f32_mf8_untied, svfloat32_t, svmfloat8_t, + z0 = svmlallbt_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlallbt_fpm (z1, z4, z5, fpm0)) + +/* +** mlalb_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlallbt z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlallbt_n_f32_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlallbt_fpm (z0, z4, d7, fpm0)) + +/* +** mlalb_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlallbt z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlallbt_n_f32_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlallbt_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c new file mode 100644 index 000000000000..cbe297c188b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalltb_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlalltb z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalltb_lane_0_f16_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlalltb_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlalltb_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlalltb_lane_0_f32_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltb z0\.s, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalltb_lane_0_f32_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlalltb_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlalltb_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlalltb_lane_0_f32_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltb z0\.s, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalltb_lane_0_f32_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlalltb_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlalltb_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlalltb_lane_0_f32_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalltb z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalltb_lane_0_f32_untied, svfloat32_t, svmfloat8_t, + z0 = svmlalltb_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlalltb_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlalltb_lane_1_f32: +** msr fpmr, x0 +** fmlalltb z0\.s, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlalltb_lane_1_f32, svfloat32_t, svmfloat8_t, + z0 = svmlalltb_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlalltb_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlalltb_lane_z8_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlalltb z0\.s, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlalltb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, + z0 = svmlalltb_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlalltb_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlalltb_lane_z16_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlalltb z0\.s, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlalltb_lane_z16_f32, svfloat32_t, svmfloat8_t, z16, + z0 = svmlalltb_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlalltb_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c new file mode 100644 index 000000000000..a921dbd18816 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalltb_f32_mf8_tied1: +** msr fpmr, x0 +** fmlalltb z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalltb_f32_mf8_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlalltb_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlalltb_fpm (z0, z4, z5, fpm0)) + +/* +** mlalltb_f32_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltb z0\.s, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalltb_f32_mf8_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlalltb_f32_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlalltb_fpm (z4, z0, z1, fpm0)) + +/* +** mlalltb_f32_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltb z0\.s, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalltb_f32_mf8_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlalltb_f32_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlalltb_fpm (z4, z1, z0, fpm0)) + +/* +** mlalltb_f32_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalltb z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalltb_f32_mf8_untied, svfloat32_t, svmfloat8_t, + z0 = svmlalltb_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlalltb_fpm (z1, z4, z5, fpm0)) + +/* +** mlalb_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlalltb z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlalltb_n_f32_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlalltb_fpm (z0, z4, d7, fpm0)) + +/* +** mlalb_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlalltb z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlalltb_n_f32_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlalltb_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c new file mode 100644 index 000000000000..fc5bfba7877c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalltt_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlalltt z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalltt_lane_0_f16_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlalltt_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlalltt_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlalltt_lane_0_f32_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltt z0\.s, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalltt_lane_0_f32_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlalltt_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlalltt_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlalltt_lane_0_f32_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltt z0\.s, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalltt_lane_0_f32_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlalltt_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlalltt_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlalltt_lane_0_f32_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalltt z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalltt_lane_0_f32_untied, svfloat32_t, svmfloat8_t, + z0 = svmlalltt_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlalltt_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlalltt_lane_1_f32: +** msr fpmr, x0 +** fmlalltt z0\.s, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlalltt_lane_1_f32, svfloat32_t, svmfloat8_t, + z0 = svmlalltt_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlalltt_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlalltt_lane_z8_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlalltt z0\.s, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlalltt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, + z0 = svmlalltt_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlalltt_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlalltt_lane_z16_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlalltt z0\.s, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlalltt_lane_z16_f32, svfloat32_t, svmfloat8_t, z16, + z0 = svmlalltt_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlalltt_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c new file mode 100644 index 000000000000..5cd6beb348ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalltt_f32_mf8_tied1: +** msr fpmr, x0 +** fmlalltt z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalltt_f32_mf8_tied1, svfloat32_t, svmfloat8_t, + z0 = svmlalltt_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlalltt_fpm (z0, z4, z5, fpm0)) + +/* +** mlalltt_f32_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltt z0\.s, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalltt_f32_mf8_tied2, svfloat32_t, svmfloat8_t, + z0_res = svmlalltt_f32_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlalltt_fpm (z4, z0, z1, fpm0)) + +/* +** mlalltt_f32_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalltt z0\.s, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalltt_f32_mf8_tied3, svfloat32_t, svmfloat8_t, + z0_res = svmlalltt_f32_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlalltt_fpm (z4, z1, z0, fpm0)) + +/* +** mlalltt_f32_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalltt z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalltt_f32_mf8_untied, svfloat32_t, svmfloat8_t, + z0 = svmlalltt_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlalltt_fpm (z1, z4, z5, fpm0)) + +/* +** mlalb_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlalltt z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlalltt_n_f32_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlalltt_fpm (z0, z4, d7, fpm0)) + +/* +** mlalb_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlalltt z0\.s, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t, + z0 = svmlalltt_n_f32_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlalltt_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c new file mode 100644 index 000000000000..4f5a1045420a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c @@ -0,0 +1,91 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalt_lane_0_f16_tied1: +** msr fpmr, x0 +** fmlalt z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalt_lane_0_f16_tied1, svfloat16_t, svmfloat8_t, + z0 = svmlalt_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svmlalt_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** mlalt_lane_0_f16_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalt z0\.h, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalt_lane_0_f16_tied2, svfloat16_t, svmfloat8_t, + z0_res = svmlalt_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svmlalt_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** mlalt_lane_0_f16_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalt z0\.h, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (mlalt_lane_0_f16_tied3, svfloat16_t, svmfloat8_t, + z0_res = svmlalt_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svmlalt_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** mlalt_lane_0_f16_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalt z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (mlalt_lane_0_f16_untied, svfloat16_t, svmfloat8_t, + z0 = svmlalt_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svmlalt_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** mlalt_lane_1_f16: +** msr fpmr, x0 +** fmlalt z0\.h, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (mlalt_lane_1_f16, svfloat16_t, svmfloat8_t, + z0 = svmlalt_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svmlalt_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** mlalt_lane_z8_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fmlalt z0\.h, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (mlalt_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, + z0 = svmlalt_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svmlalt_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** mlalt_lane_z16_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fmlalt z0\.h, z1\.b, \1\.b\[15\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (mlalt_lane_z16_f16, svfloat16_t, svmfloat8_t, z16, + z0 = svmlalt_lane_f16_mf8_fpm (z0, z1, z16, 15, fpm0), + z0 = svmlalt_lane_fpm (z0, z1, z16, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c new file mode 100644 index 000000000000..3a305d31cb8d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c @@ -0,0 +1,78 @@ +/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8fma" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8fma" +#endif + +/* +** mlalt_f16_mf8_tied1: +** msr fpmr, x0 +** fmlalt z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalt_f16_mf8_tied1, svfloat16_t, svmfloat8_t, + z0 = svmlalt_f16_mf8_fpm (z0, z4, z5, fpm0), + z0 = svmlalt_fpm (z0, z4, z5, fpm0)) + +/* +** mlalt_f16_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalt z0\.h, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalt_f16_mf8_tied2, svfloat16_t, svmfloat8_t, + z0_res = svmlalt_f16_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svmlalt_fpm (z4, z0, z1, fpm0)) + +/* +** mlalt_f16_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fmlalt z0\.h, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (mlalt_f16_mf8_tied3, svfloat16_t, svmfloat8_t, + z0_res = svmlalt_f16_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svmlalt_fpm (z4, z1, z0, fpm0)) + +/* +** mlalt_f16_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fmlalt z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (mlalt_f16_mf8_untied, svfloat16_t, svmfloat8_t, + z0 = svmlalt_f16_mf8_fpm (z1, z4, z5, fpm0), + z0 = svmlalt_fpm (z1, z4, z5, fpm0)) + +/* +** mlalt_h7_f16_tied1: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** fmlalt z0\.h, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalt_h7_f16_tied1, svfloat16_t, svmfloat8_t, mfloat8_t, + z0 = svmlalt_n_f16_mf8_fpm (z0, z4, d7, fpm0), + z0 = svmlalt_fpm (z0, z4, d7, fpm0)) + +/* +** mlalt_h7_f16_untied: +** msr fpmr, x0 +** mov (z[0-9]+\.b), b7 +** movprfx z0, z1 +** fmlalt z0\.h, z4\.b, \1 +** ret +*/ +TEST_DUAL_ZD (mlalt_h7_f16_untied, svfloat16_t, svmfloat8_t, mfloat8_t, + z0 = svmlalt_n_f16_mf8_fpm (z1, z4, d7, fpm0), + z0 = svmlalt_fpm (z1, z4, d7, fpm0)) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a3edccf1fda4..a122178bd21f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12140,7 +12140,8 @@ proc check_effective_target_aarch64_tiny { } { foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64" "sme" "sme-i16i64" "sme2" "sve-b16b16" - "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" } { + "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" "fp8fma" + "ssve-fp8fma" } { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } {