The AArch64 FEAT_FAMINMAX extension introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise.
This patch introduces SVE2 faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * sva[max|min]_[m|x|z] * sva[max|min]_[f16|f32|f64]_[m|x|z] * sva[max|min]_n_[f16|f32|f64]_[m|x|z] gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.def (REQUIRED_EXTENSIONS): Add faminmax intrinsics behind a flag. (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.h: Declaring function bases for the new intrinsics. * config/aarch64/aarch64.h (TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax. * config/aarch64/iterators.md: New unspecs, iterators, and attrs for the new intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test. --- .../aarch64/aarch64-sve-builtins-base.cc | 4 + .../aarch64/aarch64-sve-builtins-base.def | 5 + .../aarch64/aarch64-sve-builtins-base.h | 2 + gcc/config/aarch64/aarch64.h | 1 + gcc/config/aarch64/iterators.md | 40 +++-- .../aarch64/sve2/acle/asm/amax_f16.c | 142 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amax_f32.c | 142 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amax_f64.c | 142 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f16.c | 142 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f32.c | 142 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f64.c | 142 ++++++++++++++++++ 11 files changed, 893 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index afce52a7e8d..dd4efdf6ca5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -3070,6 +3070,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0)) FUNCTION (svadrd, svadr_bhwd_impl, (3)) FUNCTION (svadrh, svadr_bhwd_impl, (1)) FUNCTION (svadrw, svadr_bhwd_impl, (2)) +FUNCTION (svamax, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMAX, UNSPEC_FAMAX)) +FUNCTION (svamin, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMIN, UNSPEC_FAMIN)) FUNCTION (svand, rtx_code_function, (AND, AND)) FUNCTION (svandv, reduction, (UNSPEC_ANDV)) FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index 65fcba91586..95e04e4393d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def @@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX +DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) +DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h index 5bbf3569c4b..978cf7013f9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h @@ -37,6 +37,8 @@ namespace aarch64_sve extern const function_base *const svadrd; extern const function_base *const svadrh; extern const function_base *const svadrw; + extern const function_base *const svamax; + extern const function_base *const svamin; extern const function_base *const svand; extern const function_base *const svandv; extern const function_base *const svasr; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 43819adb48c..a496235db42 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -470,6 +470,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* Floating Point Absolute Maximum/Minimum extension instructions are enabled through +faminmax. */ #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) +#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c2fcd18306e..cf9ee2639a9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -841,6 +841,8 @@ UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. UNSPEC_COND_FABS ; Used in aarch64-sve.md. UNSPEC_COND_FADD ; Used in aarch64-sve.md. + UNSPEC_COND_FAMAX ; Used in aarch64-sve.md. + UNSPEC_COND_FAMIN ; Used in aarch64-sve.md. UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. @@ -3081,15 +3083,18 @@ (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) -(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD - UNSPEC_COND_FDIV - UNSPEC_COND_FMAX - UNSPEC_COND_FMAXNM - UNSPEC_COND_FMIN - UNSPEC_COND_FMINNM - UNSPEC_COND_FMUL - UNSPEC_COND_FMULX - UNSPEC_COND_FSUB]) +(define_int_iterator SVE_COND_FP_BINARY + [UNSPEC_COND_FADD + (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + UNSPEC_COND_FDIV + UNSPEC_COND_FMAX + UNSPEC_COND_FMAXNM + UNSPEC_COND_FMIN + UNSPEC_COND_FMINNM + UNSPEC_COND_FMUL + UNSPEC_COND_FMULX + UNSPEC_COND_FSUB]) ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated ;; <optab><mode>3 expander. @@ -3114,8 +3119,11 @@ UNSPEC_COND_FMINNM UNSPEC_COND_FMUL]) -(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV - UNSPEC_COND_FMULX]) +(define_int_iterator SVE_COND_FP_BINARY_REG + [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + UNSPEC_COND_FDIV + UNSPEC_COND_FMULX]) (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 UNSPEC_COND_FCADD270]) @@ -3694,6 +3702,8 @@ (UNSPEC_ZIP2Q "zip2q") (UNSPEC_COND_FABS "abs") (UNSPEC_COND_FADD "add") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCADD90 "cadd90") (UNSPEC_COND_FCADD270 "cadd270") (UNSPEC_COND_FCMLA "fcmla") @@ -4230,6 +4240,8 @@ (UNSPEC_FTSSEL "ftssel") (UNSPEC_COND_FABS "fabs") (UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCVTLT "fcvtlt") (UNSPEC_COND_FCVTX "fcvtx") (UNSPEC_COND_FDIV "fdiv") @@ -4254,6 +4266,8 @@ (UNSPEC_COND_FSUB "fsub")]) (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FDIV "fdivr") (UNSPEC_COND_FMAX "fmax") (UNSPEC_COND_FMAXNM "fmaxnm") @@ -4390,6 +4404,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs1_operand [(UNSPEC_COND_FADD "register_operand") + (UNSPEC_COND_FAMAX "register_operand") + (UNSPEC_COND_FAMIN "register_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "register_operand") (UNSPEC_COND_FMAXNM "register_operand") @@ -4403,6 +4419,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs2_operand [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") + (UNSPEC_COND_FAMAX "aarch64_sve_float_maxmin_operand") + (UNSPEC_COND_FAMIN "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c new file mode 100644 index 00000000000..e5681a0733e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f16_m_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied1, svfloat16_t, + z0 = svamax_f16_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied2, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f16_m_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_untied, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f16_x_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied1, svfloat16_t, + z0 = svamax_f16_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f16_x_tied2: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied2, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f16_x_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_untied, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t, + z0 = svamax_f16_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f16_m_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f16_x_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f16_x_untied: +** mov z0\.h, h7 +** famax z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c new file mode 100644 index 00000000000..ac6fd227b52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f32_m_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied1, svfloat32_t, + z0 = svamax_f32_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied2, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f32_m_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_untied, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f32_x_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied1, svfloat32_t, + z0 = svamax_f32_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f32_x_tied2: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied2, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f32_x_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_untied, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied1, svfloat32_t, + z0 = svamax_f32_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied2, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_untied, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f32_m_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f32_x_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f32_x_untied: +** mov z0\.s, s7 +** famax z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c new file mode 100644 index 00000000000..9e711674ea5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f64_m_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied1, svfloat64_t, + z0 = svamax_f64_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied2, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f64_m_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_untied, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f64_x_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied1, svfloat64_t, + z0 = svamax_f64_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f64_x_tied2: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied2, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f64_x_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_untied, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied1, svfloat64_t, + z0 = svamax_f64_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied2, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_untied, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f64_m_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f64_x_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f64_x_untied: +** mov z0\.d, d7 +** famax z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c new file mode 100644 index 00000000000..3c949df023c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f16_m_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied1, svfloat16_t, + z0 = svamin_f16_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied2, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f16_m_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_untied, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f16_x_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied1, svfloat16_t, + z0 = svamin_f16_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f16_x_tied2: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied2, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f16_x_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_untied, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied1, svfloat16_t, + z0 = svamin_f16_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied2, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_untied, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f16_m_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f16_x_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f16_x_untied: +** mov z0\.h, h7 +** famin z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c new file mode 100644 index 00000000000..b606c448ea6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f32_m_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied1, svfloat32_t, + z0 = svamin_f32_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied2, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f32_m_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_untied, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f32_x_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied1, svfloat32_t, + z0 = svamin_f32_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f32_x_tied2: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied2, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f32_x_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_untied, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied1, svfloat32_t, + z0 = svamin_f32_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied2, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_untied, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f32_m_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f32_x_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f32_x_untied: +** mov z0\.s, s7 +** famin z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c new file mode 100644 index 00000000000..d91b7200c18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f64_m_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied1, svfloat64_t, + z0 = svamin_f64_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied2, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f64_m_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_untied, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f64_x_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied1, svfloat64_t, + z0 = svamin_f64_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f64_x_tied2: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied2, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f64_x_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_untied, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied1, svfloat64_t, + z0 = svamin_f64_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied2, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_untied, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f64_m_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f64_x_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f64_x_untied: +** mov z0\.d, d7 +** famin z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7))