The AArch64 FEAT_FAMINMAX extension introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise.
This patch adds code generation for famax and famin in terms of existing unspecs. With this patch: 1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands and then taking absolute value of their result. 2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands and then taking absolute value of their result. This fusion of operators is only possible when -march=armv9-a+faminmax+sve flags are passed. We also need to pass -ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX and UNSPEC_COND_SMIN. This code generation is only available on -O2 or -O3 as that is when auto-vectorization is enabled. gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax codegen. * config/aarch64/iterators.md: Iterator and attribute for faminmax codegen. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/faminmax_1.c: New test. * gcc.target/aarch64/sve/faminmax_2.c: New test. --- gcc/config/aarch64/aarch64-sve2.md | 31 ++++ gcc/config/aarch64/iterators.md | 6 + .../gcc.target/aarch64/sve/faminmax_1.c | 85 ++++++++++ .../gcc.target/aarch64/sve/faminmax_2.c | 154 ++++++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 972b03a4fef..6a8e940e16d 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2467,6 +2467,37 @@ [(set_attr "movprfx" "yes")] ) +;; ------------------------------------------------------------------------- +;; -- [FP] Absolute maximum and minimum +;; ------------------------------------------------------------------------- +;; Includes: +;; - FAMAX +;; - FAMIN +;; ------------------------------------------------------------------------- +;; Predicated floating-point absolute maximum and minimum. +(define_insn "*aarch64_pred_faminmax_fused" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_operand 5) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 2 "register_operand")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_operand 6) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 3 "register_operand")] + UNSPEC_COND_FABS)] + SVE_COND_FP_SMAXMIN))] + "TARGET_SVE_FAMINMAX" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , Upl , %0 , w ; * ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + } +) + ;; ========================================================================= ;; == Complex arithmetic ;; ========================================================================= diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d3a457fc6d9..e9adb4209da 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3143,6 +3143,9 @@ UNSPEC_COND_FMIN UNSPEC_COND_FMINNM]) +(define_int_iterator SVE_COND_FP_SMAXMIN [UNSPEC_COND_SMAX + UNSPEC_COND_SMIN]) + (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA UNSPEC_COND_FMLS UNSPEC_COND_FNMLA @@ -4503,6 +4506,9 @@ (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) +(define_int_attr faminmax_cond_uns_op + [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")]) + (define_int_attr faminmax_uns_op [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c new file mode 100644 index 00000000000..bdf077ab2f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c @@ -0,0 +1,85 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -ffast-math" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_sve.h" + +#pragma GCC target "+sve+faminmax" + +#define TEST_FAMAX(TYPE) \ + void fn_famax_##TYPE (TYPE * restrict a, \ + TYPE * restrict b, \ + TYPE * restrict c, \ + int n) { \ + for (int i = 0; i < n; i++) { \ + TYPE temp1 = __builtin_fabs (a[i]); \ + TYPE temp2 = __builtin_fabs (b[i]); \ + c[i] = __builtin_fmax (temp1, temp2); \ + } \ + } \ + +#define TEST_FAMIN(TYPE) \ + void fn_famin_##TYPE (TYPE * restrict a, \ + TYPE * restrict b, \ + TYPE * restrict c, \ + int n) { \ + for (int i = 0; i < n; i++) { \ + TYPE temp1 = __builtin_fabs (a[i]); \ + TYPE temp2 = __builtin_fabs (b[i]); \ + c[i] = __builtin_fmin (temp1, temp2); \ + } \ + } \ + +/* +** fn_famax_float16_t: +** ... +** famax z30.h, p6/m, z30.h, z31.h +** ... +** ret +*/ +TEST_FAMAX (float16_t) + +/* +** fn_famax_float32_t: +** ... +** famax z30.s, p6/m, z30.s, z31.s +** ... +** ret +*/ +TEST_FAMAX (float32_t) + +/* +** fn_famax_float64_t: +** ... +** famax z30.d, p6/m, z30.d, z31.d +** ... +** ret +*/ +TEST_FAMAX (float64_t) + +/* +** fn_famin_float16_t: +** ... +** famin z30.h, p6/m, z30.h, z31.h +** ... +** ret +*/ +TEST_FAMIN (float16_t) + +/* +** fn_famin_float32_t: +** ... +** famin z30.s, p6/m, z30.s, z31.s +** ... +** ret +*/ +TEST_FAMIN (float32_t) + +/* +** fn_famin_float64_t: +** ... +** famin z30.d, p6/m, z30.d, z31.d +** ... +** ret +*/ +TEST_FAMIN (float64_t) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c new file mode 100644 index 00000000000..26396979389 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c @@ -0,0 +1,154 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -ffast-math" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_sve.h" + +#pragma GCC target "+sve+faminmax" + +#define TEST_WITH_SVMAX(TYPE) \ + TYPE fn_fmax_##TYPE (TYPE x, TYPE y) { \ + svbool_t pg = svptrue_b8(); \ + return svmax_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ + } \ + +#define TEST_WITH_SVMAXNM(TYPE) \ + TYPE fn_fmaxnm_##TYPE (TYPE x, TYPE y) { \ + svbool_t pg = svptrue_b8(); \ + return svmaxnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ + } \ + +#define TEST_WITH_SVMIN(TYPE) \ + TYPE fn_fmin_##TYPE (TYPE x, TYPE y) { \ + svbool_t pg = svptrue_b8(); \ + return svmin_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ + } \ + +#define TEST_WITH_SVMINNM(TYPE) \ + TYPE fn_fminnm_##TYPE (TYPE x, TYPE y) { \ + svbool_t pg = svptrue_b8(); \ + return svminnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ + } \ + +/* +** fn_fmax_svfloat16_t: +** ptrue p3.b, all +** fabs z0.h, p3/m, z0.h +** fabs z1.h, p3/m, z1.h +** fmax z0.h, p3/m, z0.h, z1.h +** ret +*/ +TEST_WITH_SVMAX (svfloat16_t) + +/* +** fn_fmax_svfloat32_t: +** ptrue p3.b, all +** fabs z0.s, p3/m, z0.s +** fabs z1.s, p3/m, z1.s +** fmax z0.s, p3/m, z0.s, z1.s +** ret +*/ +TEST_WITH_SVMAX (svfloat32_t) + +/* +** fn_fmax_svfloat64_t: +** ptrue p3.b, all +** fabs z0.d, p3/m, z0.d +** fabs z1.d, p3/m, z1.d +** fmax z0.d, p3/m, z0.d, z1.d +** ret +*/ +TEST_WITH_SVMAX (svfloat64_t) + +/* +** fn_fmaxnm_svfloat16_t: +** ptrue p3.b, all +** fabs z0.h, p3/m, z0.h +** fabs z1.h, p3/m, z1.h +** fmaxnm z0.h, p3/m, z0.h, z1.h +** ret +*/ +TEST_WITH_SVMAXNM (svfloat16_t) + +/* +** fn_fmaxnm_svfloat32_t: +** ptrue p3.b, all +** fabs z0.s, p3/m, z0.s +** fabs z1.s, p3/m, z1.s +** fmaxnm z0.s, p3/m, z0.s, z1.s +** ret +*/ +TEST_WITH_SVMAXNM (svfloat32_t) + +/* +** fn_fmaxnm_svfloat64_t: +** ptrue p3.b, all +** fabs z0.d, p3/m, z0.d +** fabs z1.d, p3/m, z1.d +** fmaxnm z0.d, p3/m, z0.d, z1.d +** ret +*/ +TEST_WITH_SVMAXNM (svfloat64_t) + +/* +** fn_fmin_svfloat16_t: +** ptrue p3.b, all +** fabs z0.h, p3/m, z0.h +** fabs z1.h, p3/m, z1.h +** fmin z0.h, p3/m, z0.h, z1.h +** ret +*/ +TEST_WITH_SVMIN (svfloat16_t) + +/* +** fn_fmin_svfloat32_t: +** ptrue p3.b, all +** fabs z0.s, p3/m, z0.s +** fabs z1.s, p3/m, z1.s +** fmin z0.s, p3/m, z0.s, z1.s +** ret +*/ +TEST_WITH_SVMIN (svfloat32_t) + +/* +** fn_fmin_svfloat64_t: +** ptrue p3.b, all +** fabs z0.d, p3/m, z0.d +** fabs z1.d, p3/m, z1.d +** fmin z0.d, p3/m, z0.d, z1.d +** ret +*/ +TEST_WITH_SVMIN (svfloat64_t) + +/* +** fn_fminnm_svfloat16_t: +** ptrue p3.b, all +** fabs z0.h, p3/m, z0.h +** fabs z1.h, p3/m, z1.h +** fminnm z0.h, p3/m, z0.h, z1.h +** ret +*/ +TEST_WITH_SVMINNM (svfloat16_t) + +/* +** fn_fminnm_svfloat32_t: +** ptrue p3.b, all +** fabs z0.s, p3/m, z0.s +** fabs z1.s, p3/m, z1.s +** fminnm z0.s, p3/m, z0.s, z1.s +** ret +*/ +TEST_WITH_SVMINNM (svfloat32_t) + +/* +** fn_fminnm_svfloat64_t: +** ptrue p3.b, all +** fabs z0.d, p3/m, z0.d +** fabs z1.d, p3/m, z1.d +** fminnm z0.d, p3/m, z0.d, z1.d +** ret +*/ +TEST_WITH_SVMINNM (svfloat64_t) + +/* { dg-final { scan-assembler-not {\tfamax\t} } } */ +/* { dg-final { scan-assembler-not {\tfamin\t} } } */