<saurabh....@arm.com> writes: > The AArch64 FEAT_FAMINMAX extension introduces instructions for > computing the floating point absolute maximum and minimum of the > two vectors element-wise. > > This patch adds code generation for famax and famin in terms of existing > unspecs. With this patch: > 1. famax can be expressed as taking UNSPEC_COND_SMAX of the two operands > and then taking absolute value of their result. > 2. famin can be expressed as taking UNSPEC_COND_SMIN of the two operands > and then taking absolute value of their result. > > This fusion of operators is only possible when > -march=armv9-a+faminmax+sve flags are passed. We also need to pass > -ffast-math flag; this is what enables compiler to use UNSPEC_COND_SMAX > and UNSPEC_COND_SMIN. > > This code generation is only available on -O2 or -O3 as that is when > auto-vectorization is enabled. > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve2.md > (*aarch64_pred_faminmax_fused): Instruction pattern for faminmax > codegen. > * config/aarch64/iterators.md: Iterator and attribute for > faminmax codegen. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/faminmax_1.c: New test. > * gcc.target/aarch64/sve/faminmax_2.c: New test.
OK, thanks. Richard > --- > gcc/config/aarch64/aarch64-sve2.md | 37 +++++++++++ > gcc/config/aarch64/iterators.md | 6 ++ > .../gcc.target/aarch64/sve/faminmax_1.c | 45 ++++++++++++++ > .../gcc.target/aarch64/sve/faminmax_2.c | 61 +++++++++++++++++++ > 4 files changed, 149 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index 725092cc95f..5f2697c3179 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -2467,6 +2467,43 @@ > [(set_attr "movprfx" "yes")] > ) > > +;; ------------------------------------------------------------------------- > +;; -- [FP] Absolute maximum and minimum > +;; ------------------------------------------------------------------------- > +;; Includes: > +;; - FAMAX > +;; - FAMIN > +;; ------------------------------------------------------------------------- > +;; Predicated floating-point absolute maximum and minimum. > +(define_insn_and_rewrite "*aarch64_pred_faminmax_fused" > + [(set (match_operand:SVE_FULL_F 0 "register_operand") > + (unspec:SVE_FULL_F > + [(match_operand:<VPRED> 1 "register_operand") > + (match_operand:SI 4 "aarch64_sve_gp_strictness") > + (unspec:SVE_FULL_F > + [(match_operand 5) > + (const_int SVE_RELAXED_GP) > + (match_operand:SVE_FULL_F 2 "register_operand")] > + UNSPEC_COND_FABS) > + (unspec:SVE_FULL_F > + [(match_operand 6) > + (const_int SVE_RELAXED_GP) > + (match_operand:SVE_FULL_F 3 "register_operand")] > + UNSPEC_COND_FABS)] > + SVE_COND_SMAXMIN))] > + "TARGET_SVE_FAMINMAX" > + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] > + [ w , Upl , %0 , w ; * ] > <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> > + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, > %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> > + } > + "&& (!rtx_equal_p (operands[1], operands[5]) > + || !rtx_equal_p (operands[1], operands[6]))" > + { > + operands[5] = copy_rtx (operands[1]); > + operands[6] = copy_rtx (operands[1]); > + } > +) > + > ;; ========================================================================= > ;; == Complex arithmetic > ;; ========================================================================= > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index c06f8c2c90f..8b18682c341 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -3143,6 +3143,9 @@ > UNSPEC_COND_FMIN > UNSPEC_COND_FMINNM]) > > +(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX > + UNSPEC_COND_SMIN]) > + > (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA > UNSPEC_COND_FMLS > UNSPEC_COND_FNMLA > @@ -4503,6 +4506,9 @@ > > (define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) > > +(define_int_attr faminmax_cond_uns_op > + [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")]) > + > (define_int_attr faminmax_uns_op > [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c > new file mode 100644 > index 00000000000..d54f5d99b5e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_1.c > @@ -0,0 +1,45 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3 -ffast-math" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "arm_sve.h" > + > +#pragma GCC target "+sve+faminmax" > + > +#define TEST_FAMAX(TYPE) \ > + void fn_famax_##TYPE (TYPE * restrict a, \ > + TYPE * restrict b, \ > + TYPE * restrict c, \ > + int n) { \ > + for (int i = 0; i < n; i++) { \ > + TYPE temp1 = __builtin_fabs (a[i]); \ > + TYPE temp2 = __builtin_fabs (b[i]); \ > + c[i] = __builtin_fmax (temp1, temp2); \ > + } > \ > + } \ > + > +#define TEST_FAMIN(TYPE) \ > + void fn_famin_##TYPE (TYPE * restrict a, \ > + TYPE * restrict b, \ > + TYPE * restrict c, \ > + int n) { \ > + for (int i = 0; i < n; i++) { \ > + TYPE temp1 = __builtin_fabs (a[i]); \ > + TYPE temp2 = __builtin_fabs (b[i]); \ > + c[i] = __builtin_fmin (temp1, temp2); \ > + } > \ > + } \ > + > +TEST_FAMAX (float16_t) > +TEST_FAMAX (float32_t) > +TEST_FAMAX (float64_t) > +TEST_FAMIN (float16_t) > +TEST_FAMIN (float32_t) > +TEST_FAMIN (float64_t) > + > +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfamax\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfamin\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c > new file mode 100644 > index 00000000000..29e12450831 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/faminmax_2.c > @@ -0,0 +1,61 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3 -ffast-math" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "arm_sve.h" > + > +#pragma GCC target "+sve+faminmax" > + > +#define TEST_WITH_SVMAX(TYPE) > \ > + TYPE fn_fmax_##TYPE (TYPE x, TYPE y) { \ > + svbool_t pg = svptrue_b8(); > \ > + return svmax_x(pg, svabs_x(pg, x), svabs_x(pg, y)); > \ > + } \ > + > +#define TEST_WITH_SVMAXNM(TYPE) > \ > + TYPE fn_fmaxnm_##TYPE (TYPE x, TYPE y) { \ > + svbool_t pg = svptrue_b8(); > \ > + return svmaxnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ > + } \ > + > +#define TEST_WITH_SVMIN(TYPE) > \ > + TYPE fn_fmin_##TYPE (TYPE x, TYPE y) { \ > + svbool_t pg = svptrue_b8(); > \ > + return svmin_x(pg, svabs_x(pg, x), svabs_x(pg, y)); > \ > + } \ > + > +#define TEST_WITH_SVMINNM(TYPE) > \ > + TYPE fn_fminnm_##TYPE (TYPE x, TYPE y) { \ > + svbool_t pg = svptrue_b8(); > \ > + return svminnm_x(pg, svabs_x(pg, x), svabs_x(pg, y)); \ > + } \ > + > +TEST_WITH_SVMAX (svfloat16_t) > +TEST_WITH_SVMAX (svfloat32_t) > +TEST_WITH_SVMAX (svfloat64_t) > + > +TEST_WITH_SVMAXNM (svfloat16_t) > +TEST_WITH_SVMAXNM (svfloat32_t) > +TEST_WITH_SVMAXNM (svfloat64_t) > + > +TEST_WITH_SVMIN (svfloat16_t) > +TEST_WITH_SVMIN (svfloat32_t) > +TEST_WITH_SVMIN (svfloat64_t) > + > +TEST_WITH_SVMINNM (svfloat16_t) > +TEST_WITH_SVMINNM (svfloat32_t) > +TEST_WITH_SVMINNM (svfloat64_t) > + > +/* { dg-final { scan-assembler-not {\tfamax\t} } } */ > +/* { dg-final { scan-assembler-not {\tfamin\t} } } */ > + > +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 8 } } */ > +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 8 } } */ > +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d\n} 8 } } */ > + > +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmax\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmin\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */