The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise.
This patch introduces SVE2 faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * sva[max|min]_[m|x|z] * sva[max|min]_[f16|f32|f64]_[m|x|z] * sva[max|min]_n_[f16|f32|f64]_[m|x|z] gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.def (svamax): Absolute maximum declaration. (svamin): Absolute minimum declaration. * config/aarch64/aarch64-sve-builtins-base.h: Declaring function bases for the new intrinsics. * config/aarch64/aarch64.h (TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax. * config/aarch64/iterators.md: New unspecs, iterators, and attrs for the new intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/aminmax.h: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test. * gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test. --- .../aarch64/aarch64-sve-builtins-base.cc | 4 + .../aarch64/aarch64-sve-builtins-base.def | 5 + .../aarch64/aarch64-sve-builtins-base.h | 2 + gcc/config/aarch64/aarch64.h | 1 + gcc/config/aarch64/iterators.md | 18 +- gcc/testsuite/gcc.target/aarch64/aminmax.h | 13 ++ .../aarch64/sve2/acle/asm/amax_f16.c | 155 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amax_f32.c | 155 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amax_f64.c | 155 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f16.c | 155 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f32.c | 155 ++++++++++++++++++ .../aarch64/sve2/acle/asm/amin_f64.c | 155 ++++++++++++++++++ 12 files changed, 972 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/aminmax.h create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 8f781e26cc8..80c67715fd7 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -3044,6 +3044,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0)) FUNCTION (svadrd, svadr_bhwd_impl, (3)) FUNCTION (svadrh, svadr_bhwd_impl, (1)) FUNCTION (svadrw, svadr_bhwd_impl, (2)) +FUNCTION (svamax, cond_or_uncond_unspec_function, (UNSPEC_COND_FAMAX, + UNSPEC_FAMAX)) +FUNCTION (svamin, cond_or_uncond_unspec_function, (UNSPEC_COND_FAMIN, + UNSPEC_FAMAX)) FUNCTION (svand, rtx_code_function, (AND, AND)) FUNCTION (svandv, reduction, (UNSPEC_ANDV)) FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index 65fcba91586..95e04e4393d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def @@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX +DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) +DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h index 5bbf3569c4b..978cf7013f9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h @@ -37,6 +37,8 @@ namespace aarch64_sve extern const function_base *const svadrd; extern const function_base *const svadrh; extern const function_base *const svadrw; + extern const function_base *const svamax; + extern const function_base *const svamin; extern const function_base *const svand; extern const function_base *const svandv; extern const function_base *const svasr; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index de14f57071a..e9730b8c36a 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -460,6 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* Floating Point Absolute Maximum/Minimum extension instructions are enabled through +faminmax. */ #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) +#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c2fcd18306e..b993ac9a7f6 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -841,6 +841,8 @@ UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. UNSPEC_COND_FABS ; Used in aarch64-sve.md. UNSPEC_COND_FADD ; Used in aarch64-sve.md. + UNSPEC_COND_FAMAX ; Used in aarch64-sve.md. + UNSPEC_COND_FAMIN ; Used in aarch64-sve.md. UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. @@ -3082,6 +3084,8 @@ (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) (define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD + (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") UNSPEC_COND_FDIV UNSPEC_COND_FMAX UNSPEC_COND_FMAXNM @@ -3114,7 +3118,9 @@ UNSPEC_COND_FMINNM UNSPEC_COND_FMUL]) -(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV +(define_int_iterator SVE_COND_FP_BINARY_REG [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + UNSPEC_COND_FDIV UNSPEC_COND_FMULX]) (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 @@ -3694,6 +3700,8 @@ (UNSPEC_ZIP2Q "zip2q") (UNSPEC_COND_FABS "abs") (UNSPEC_COND_FADD "add") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCADD90 "cadd90") (UNSPEC_COND_FCADD270 "cadd270") (UNSPEC_COND_FCMLA "fcmla") @@ -4230,6 +4238,8 @@ (UNSPEC_FTSSEL "ftssel") (UNSPEC_COND_FABS "fabs") (UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCVTLT "fcvtlt") (UNSPEC_COND_FCVTX "fcvtx") (UNSPEC_COND_FDIV "fdiv") @@ -4254,6 +4264,8 @@ (UNSPEC_COND_FSUB "fsub")]) (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FDIV "fdivr") (UNSPEC_COND_FMAX "fmax") (UNSPEC_COND_FMAXNM "fmaxnm") @@ -4390,6 +4402,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs1_operand [(UNSPEC_COND_FADD "register_operand") + (UNSPEC_COND_FAMAX "register_operand") + (UNSPEC_COND_FAMIN "register_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "register_operand") (UNSPEC_COND_FMAXNM "register_operand") @@ -4403,6 +4417,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs2_operand [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") + (UNSPEC_COND_FAMAX "aarch64_sve_float_maxmin_operand") + (UNSPEC_COND_FAMIN "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/aminmax.h b/gcc/testsuite/gcc.target/aarch64/aminmax.h new file mode 100644 index 00000000000..e901da84165 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/aminmax.h @@ -0,0 +1,13 @@ +#ifdef AMINMAX_IDIOM + +#define TEST1(TYPE) +__attribute__((noipa)) \ +void fn_##TYPE (TYPE * restrict a, \ + TYPE * restrict b, \ + TYPE * restrict out) { \ + for (int i = 0; i < N; i++) { \ + TYPE diff = b[i] - a[i]; \ + out[i] = diff > 0 ? diff : -diff; \ +} } + +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c new file mode 100644 index 00000000000..2646f29e60c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amax_f16_m_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied1, svfloat16_t, + z0 = svamax_f16_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) + +/* +** amax_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied2, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) + +/* +** amax_f16_m_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_untied, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) + +/* +** amax_f16_x_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied1, svfloat16_t, + z0 = svamax_f16_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) + +/* +** amax_f16_x_tied2: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied2, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) + +/* +** amax_f16_x_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_untied, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) + +/* +** amax_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t, + z0 = svamax_f16_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) + +/* +** amax_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) + +/* +** amax_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) + +/* +** amax_n_f16_m_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) + +/* +** amax_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) + +/* +** amax_n_f16_x_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) + +/* +** amax_n_f16_x_untied: +** mov z0\.h, h7 +** famax z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) + +/* +** amax_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) + +/* +** amax_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c new file mode 100644 index 00000000000..5b5fd2076f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amax_f32_m_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied1, svfloat32_t, + z0 = svamax_f32_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) + +/* +** amax_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied2, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) + +/* +** amax_f32_m_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_untied, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) + +/* +** amax_f32_x_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied1, svfloat32_t, + z0 = svamax_f32_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) + +/* +** amax_f32_x_tied2: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied2, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) + +/* +** amax_f32_x_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_untied, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) + +/* +** amax_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied1, svfloat32_t, + z0 = svamax_f32_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) + +/* +** amax_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied2, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) + +/* +** amax_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_untied, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) + +/* +** amax_n_f32_m_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) + +/* +** amax_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) + +/* +** amax_n_f32_x_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) + +/* +** amax_n_f32_x_untied: +** mov z0\.s, s7 +** famax z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) + +/* +** amax_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) + +/* +** amax_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c new file mode 100644 index 00000000000..4a13111dd0d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amax_f64_m_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied1, svfloat64_t, + z0 = svamax_f64_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) + +/* +** amax_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied2, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) + +/* +** amax_f64_m_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_untied, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) + +/* +** amax_f64_x_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied1, svfloat64_t, + z0 = svamax_f64_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) + +/* +** amax_f64_x_tied2: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied2, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) + +/* +** amax_f64_x_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_untied, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) + +/* +** amax_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied1, svfloat64_t, + z0 = svamax_f64_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) + +/* +** amax_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied2, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) + +/* +** amax_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_untied, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) + +/* +** amax_n_f64_m_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) + +/* +** amax_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) + +/* +** amax_n_f64_x_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) + +/* +** amax_n_f64_x_untied: +** mov z0\.d, d7 +** famax z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) + +/* +** amax_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) + +/* +** amax_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c new file mode 100644 index 00000000000..e53253e0cbe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amin_f16_m_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied1, svfloat16_t, + z0 = svamin_f16_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) + +/* +** amin_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied2, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) + +/* +** amin_f16_m_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_untied, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) + +/* +** amin_f16_x_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied1, svfloat16_t, + z0 = svamin_f16_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) + +/* +** amin_f16_x_tied2: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied2, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) + +/* +** amin_f16_x_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_untied, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) + +/* +** amin_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied1, svfloat16_t, + z0 = svamin_f16_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) + +/* +** amin_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied2, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) + +/* +** amin_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_untied, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) + +/* +** amin_n_f16_m_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) + +/* +** amin_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) + +/* +** amin_n_f16_x_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) + +/* +** amin_n_f16_x_untied: +** mov z0\.h, h7 +** famin z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) + +/* +** amin_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) + +/* +** amin_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c new file mode 100644 index 00000000000..9ea9efbe8de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amin_f32_m_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied1, svfloat32_t, + z0 = svamin_f32_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) + +/* +** amin_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied2, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) + +/* +** amin_f32_m_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_untied, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) + +/* +** amin_f32_x_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied1, svfloat32_t, + z0 = svamin_f32_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) + +/* +** amin_f32_x_tied2: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied2, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) + +/* +** amin_f32_x_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_untied, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) + +/* +** amin_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied1, svfloat32_t, + z0 = svamin_f32_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) + +/* +** amin_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied2, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) + +/* +** amin_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_untied, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) + +/* +** amin_n_f32_m_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) + +/* +** amin_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) + +/* +** amin_n_f32_x_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) + +/* +** amin_n_f32_x_untied: +** mov z0\.s, s7 +** famin z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) + +/* +** amin_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) + +/* +** amin_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c new file mode 100644 index 00000000000..2570c3d0275 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c @@ -0,0 +1,155 @@ +/* { dg-additional-options "-O3 -march=armv9-a+sve+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** amin_f64_m_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied1, svfloat64_t, + z0 = svamin_f64_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) + +/* +** amin_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied2, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) + +/* +** amin_f64_m_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_untied, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) + +/* +** amin_f64_x_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied1, svfloat64_t, + z0 = svamin_f64_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) + +/* +** amin_f64_x_tied2: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied2, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) + +/* +** amin_f64_x_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_untied, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) + +/* +** amin_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied1, svfloat64_t, + z0 = svamin_f64_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) + +/* +** amin_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied2, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) + +/* +** amin_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_untied, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) + +/* +** amin_n_f64_m_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) + +/* +** amin_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) + +/* +** amin_n_f64_x_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) + +/* +** amin_n_f64_x_untied: +** mov z0\.d, d7 +** famin z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) + +/* +** amin_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) + +/* +** amin_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7))