On Wed, Nov 10, 2021 at 1:49 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This patch adds support for reductions involving calls to fmax*() > and fmin*(), without the -ffast-math flags that allow them to be > converted to MAX_EXPR and MIN_EXPR. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK. Thanks, Richard. > Richard > > > gcc/ > * doc/md.texi (reduc_fmin_scal_@var{m}): Document. > (reduc_fmax_scal_@var{m}): Likewise. > * optabs.def (reduc_fmax_scal_optab): New optab. > (reduc_fmin_scal_optab): Likewise > * internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions. > * tree-vect-loop.c (reduction_fn_for_scalar_code): Handle > CASE_CFN_FMAX and CASE_CFN_FMIN. > (neutral_op_for_reduction): Likewise. > (needs_fold_left_reduction_p): Likewise. > * config/aarch64/iterators.md (FMAXMINV): New iterator. > (fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV. > * config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix > unspec mode. > (reduc_<fmaxmin>_scal_<mode>): New pattern. > * config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>): > Likewise. > > gcc/testsuite/ > * gcc.dg/vect/vect-fmax-1.c: New test. > * gcc.dg/vect/vect-fmax-2.c: Likewise. > * gcc.dg/vect/vect-fmax-3.c: Likewise. > * gcc.dg/vect/vect-fmin-1.c: New test. > * gcc.dg/vect/vect-fmin-2.c: Likewise. > * gcc.dg/vect/vect-fmin-3.c: Likewise. > * gcc.target/aarch64/fmaxnm_1.c: Likewise. > * gcc.target/aarch64/fmaxnm_2.c: Likewise. > * gcc.target/aarch64/fminnm_1.c: Likewise. > * gcc.target/aarch64/fminnm_2.c: Likewise. > * gcc.target/aarch64/sve/fmaxnm_1.c: Likewise. > * gcc.target/aarch64/sve/fmaxnm_2.c: Likewise. > * gcc.target/aarch64/sve/fminnm_1.c: Likewise. > * gcc.target/aarch64/sve/fminnm_2.c: Likewise. > --- > gcc/config/aarch64/aarch64-simd.md | 15 +++- > gcc/config/aarch64/aarch64-sve.md | 11 +++ > gcc/config/aarch64/iterators.md | 4 + > gcc/doc/md.texi | 8 ++ > gcc/internal-fn.def | 4 + > gcc/optabs.def | 2 + > gcc/testsuite/gcc.dg/vect/vect-fmax-1.c | 83 ++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/vect-fmax-2.c | 7 ++ > gcc/testsuite/gcc.dg/vect/vect-fmax-3.c | 83 ++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/vect-fmin-1.c | 86 +++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/vect-fmin-2.c | 9 ++ > gcc/testsuite/gcc.dg/vect/vect-fmin-3.c | 83 ++++++++++++++++++ > gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c | 24 ++++++ > gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c | 20 +++++ > gcc/testsuite/gcc.target/aarch64/fminnm_1.c | 24 ++++++ > gcc/testsuite/gcc.target/aarch64/fminnm_2.c | 20 +++++ > .../gcc.target/aarch64/sve/fmaxnm_2.c | 22 +++++ > .../gcc.target/aarch64/sve/fmaxnm_3.c | 18 ++++ > .../gcc.target/aarch64/sve/fminnm_2.c | 22 +++++ > .../gcc.target/aarch64/sve/fminnm_3.c | 18 ++++ > gcc/tree-vect-loop.c | 45 ++++++++-- > 21 files changed, 599 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-3.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-3.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index 35d55a3e51e..8e7d783f7f3 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3624,8 +3624,8 @@ (define_insn "popcount<mode>2" > ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP > smax/smin). > (define_expand "reduc_<optab>_scal_<mode>" > [(match_operand:<VEL> 0 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] > - FMAXMINV)] > + (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")] > + FMAXMINV)] > "TARGET_SIMD" > { > rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); > @@ -3637,6 +3637,17 @@ (define_expand "reduc_<optab>_scal_<mode>" > } > ) > > +(define_expand "reduc_<fmaxmin>_scal_<mode>" > + [(match_operand:<VEL> 0 "register_operand") > + (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")] > + FMAXMINNMV)] > + "TARGET_SIMD" > + { > + emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1])); > + DONE; > + } > +) > + > ;; Likewise for integer cases, signed and unsigned. > (define_expand "reduc_<optab>_scal_<mode>" > [(match_operand:<VEL> 0 "register_operand") > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index 0f5bf5ea8cb..9ef968840c2 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -8566,6 +8566,17 @@ (define_expand "reduc_<optab>_scal_<mode>" > } > ) > > +(define_expand "reduc_<fmaxmin>_scal_<mode>" > + [(match_operand:<VEL> 0 "register_operand") > + (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")] > + FMAXMINNMV)] > + "TARGET_SVE" > + { > + emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1])); > + DONE; > + } > +) > + > ;; Predicated floating-point tree reductions. > (define_insn "@aarch64_pred_reduc_<optab>_<mode>" > [(set (match_operand:<VEL> 0 "register_operand" "=w") > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index e8eebd863a6..fb568ddc4a0 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -2510,6 +2510,8 @@ (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV > (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV > UNSPEC_FMAXNMV UNSPEC_FMINNMV]) > > +(define_int_iterator FMAXMINNMV [UNSPEC_FMAXNMV UNSPEC_FMINNMV]) > + > (define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV]) > > (define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP]) > @@ -3216,8 +3218,10 @@ (define_int_attr optab [(UNSPEC_ANDF "and") > > (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan") > (UNSPEC_FMAXNM "fmax") > + (UNSPEC_FMAXNMV "fmax") > (UNSPEC_FMIN "fmin_nan") > (UNSPEC_FMINNM "fmin") > + (UNSPEC_FMINNMV "fmin") > (UNSPEC_COND_FMAXNM "fmax") > (UNSPEC_COND_FMINNM "fmin")]) > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index 589f841ea74..8fd0f8d2fe1 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -5400,6 +5400,14 @@ Find the unsigned minimum/maximum of the elements of a > vector. The vector is > operand 1, and operand 0 is the scalar result, with mode equal to the mode of > the elements of the input vector. > > +@cindex @code{reduc_fmin_scal_@var{m}} instruction pattern > +@cindex @code{reduc_fmax_scal_@var{m}} instruction pattern > +@item @samp{reduc_fmin_scal_@var{m}}, @samp{reduc_fmax_scal_@var{m}} > +Find the floating-point minimum/maximum of the elements of a vector, > +using the same rules as @code{fmin@var{m}3} and @code{fmax@var{m}3}. > +Operand 1 is a vector of mode @var{m} and operand 0 is the scalar > +result, which has mode @code{GET_MODE_INNER (@var{m})}. > + > @cindex @code{reduc_plus_scal_@var{m}} instruction pattern > @item @samp{reduc_plus_scal_@var{m}} > Compute the sum of the elements of a vector. The vector is operand 1, and > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index bb4d8ab8096..acb0dbda556 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -216,6 +216,10 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | > ECF_NOTHROW, first, > reduc_smax_scal, reduc_umax_scal, unary) > DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first, > reduc_smin_scal, reduc_umin_scal, unary) > +DEF_INTERNAL_OPTAB_FN (REDUC_FMAX, ECF_CONST | ECF_NOTHROW, > + reduc_fmax_scal, unary) > +DEF_INTERNAL_OPTAB_FN (REDUC_FMIN, ECF_CONST | ECF_NOTHROW, > + reduc_fmin_scal, unary) > DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW, > reduc_and_scal, unary) > DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW, > diff --git a/gcc/optabs.def b/gcc/optabs.def > index e25f4c9a346..cef6054b378 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -335,6 +335,8 @@ OPTAB_D (fmax_optab, "fmax$a3") > OPTAB_D (fmin_optab, "fmin$a3") > > /* Vector reduction to a scalar. */ > +OPTAB_D (reduc_fmax_scal_optab, "reduc_fmax_scal_$a") > +OPTAB_D (reduc_fmin_scal_optab, "reduc_fmin_scal_$a") > OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a") > OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a") > OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a") > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c > b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c > new file mode 100644 > index 00000000000..841ffab5666 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c > @@ -0,0 +1,83 @@ > +#include "tree-vect.h" > + > +#ifndef TYPE > +#define TYPE float > +#define FN __builtin_fmaxf > +#endif > + > +TYPE __attribute__((noipa)) > +test (TYPE x, TYPE *ptr, int n) > +{ > + for (int i = 0; i < n; ++i) > + x = FN (x, ptr[i]); > + return x; > +} > + > +#define N 128 > +#define HALF (N / 2) > + > +int > +main (void) > +{ > + check_vect (); > + > + TYPE a[N]; > + > + for (int i = 0; i < N; ++i) > + a[i] = i; > + > + if (test (-1, a, 1) != 0) > + __builtin_abort (); > + if (test (-1, a, 64) != 63) > + __builtin_abort (); > + if (test (-1, a, 65) != 64) > + __builtin_abort (); > + if (test (-1, a, 66) != 65) > + __builtin_abort (); > + if (test (-1, a, 67) != 66) > + __builtin_abort (); > + if (test (-1, a, 128) != 127) > + __builtin_abort (); > + if (test (127, a, 128) != 127) > + __builtin_abort (); > + if (test (128, a, 128) != 128) > + __builtin_abort (); > + > + for (int i = 0; i < N; ++i) > + a[i] = -i; > + > + if (test (-60, a, 4) != 0) > + __builtin_abort (); > + if (test (0, a, 4) != 0) > + __builtin_abort (); > + if (test (1, a, 4) != 1) > + __builtin_abort (); > + > + for (int i = 0; i < HALF; ++i) > + { > + a[i] = i; > + a[HALF + i] = HALF - i; > + } > + > + if (test (0, a, HALF - 16) != HALF - 17) > + __builtin_abort (); > + if (test (0, a, HALF - 2) != HALF - 3) > + __builtin_abort (); > + if (test (0, a, HALF - 1) != HALF - 2) > + __builtin_abort (); > + if (test (0, a, HALF) != HALF - 1) > + __builtin_abort (); > + if (test (0, a, HALF + 1) != HALF) > + __builtin_abort (); > + if (test (0, a, HALF + 2) != HALF) > + __builtin_abort (); > + if (test (0, a, HALF + 3) != HALF) > + __builtin_abort (); > + if (test (0, a, HALF + 16) != HALF) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c > b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c > new file mode 100644 > index 00000000000..3d1f64416d5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c > @@ -0,0 +1,7 @@ > +#define TYPE double > +#define FN __builtin_fmax > + > +#include "vect-fmax-1.c" > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c > b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c > new file mode 100644 > index 00000000000..f711ed0563e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c > @@ -0,0 +1,83 @@ > +#include "tree-vect.h" > + > +void __attribute__((noipa)) > +test (double x0, double x1, double *restrict res, double *restrict ptr, int > n) > +{ > + for (int i = 0; i < n; i += 2) > + { > + x0 = __builtin_fmax (x0, ptr[i + 0]); > + x1 = __builtin_fmax (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +#define N 128 > +#define HALF (N / 2) > + > +int > +main (void) > +{ > + check_vect (); > + > + double res[2], a[N]; > + > + for (int i = 0; i < N; i += 2) > + { > + a[i] = i < HALF ? i : HALF; > + a[i + 1] = i / 8; > + } > + > + test (-1, -1, res, a, 2); > + if (res[0] != 0 || res[1] != 0) > + __builtin_abort (); > + > + test (-1, -1, res, a, 6); > + if (res[0] != 4 || res[1] != 0) > + __builtin_abort (); > + > + test (-1, -1, res, a, 8); > + if (res[0] != 6 || res[1] != 0) > + __builtin_abort (); > + > + test (-1, -1, res, a, 10); > + if (res[0] != 8 || res[1] != 1) > + __builtin_abort (); > + > + test (-1, -1, res, a, HALF - 2); > + if (res[0] != HALF - 4 || res[1] != HALF / 8 - 1) > + __builtin_abort (); > + > + test (-1, -1, res, a, HALF); > + if (res[0] != HALF - 2 || res[1] != HALF / 8 - 1) > + __builtin_abort (); > + > + test (-1, -1, res, a, HALF + 2); > + if (res[0] != HALF || res[1] != HALF / 8) > + __builtin_abort (); > + > + test (-1, -1, res, a, HALF + 8); > + if (res[0] != HALF || res[1] != HALF / 8) > + __builtin_abort (); > + > + test (-1, -1, res, a, HALF + 10); > + if (res[0] != HALF || res[1] != HALF / 8 + 1) > + __builtin_abort (); > + > + test (-1, -1, res, a, N); > + if (res[0] != HALF || res[1] != N / 8 - 1) > + __builtin_abort (); > + > + test (HALF + 1, -1, res, a, N); > + if (res[0] != HALF + 1 || res[1] != N / 8 - 1) > + __builtin_abort (); > + > + test (HALF + 1, N, res, a, N); > + if (res[0] != HALF + 1 || res[1] != N) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c > b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c > new file mode 100644 > index 00000000000..3d5f843a9db > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c > @@ -0,0 +1,86 @@ > +#include "tree-vect.h" > + > +#ifndef TYPE > +#define TYPE float > +#define FN __builtin_fminf > +#endif > + > +TYPE __attribute__((noipa)) > +test (TYPE x, TYPE *ptr, int n) > +{ > + for (int i = 0; i < n; ++i) > + x = FN (x, ptr[i]); > + return x; > +} > + > +#define N 128 > +#define HALF (N / 2) > + > +int > +main (void) > +{ > + check_vect (); > + > + TYPE a[N]; > + > + for (int i = 0; i < N; ++i) > + a[i] = -i; > + > + if (test (1, a, 1) != 0) > + __builtin_abort (); > + if (test (1, a, 64) != -63) > + __builtin_abort (); > + if (test (1, a, 65) != -64) > + __builtin_abort (); > + if (test (1, a, 66) != -65) > + __builtin_abort (); > + if (test (1, a, 67) != -66) > + __builtin_abort (); > + if (test (1, a, 128) != -127) > + __builtin_abort (); > + if (test (-127, a, 128) != -127) > + __builtin_abort (); > + if (test (-128, a, 128) != -128) > + __builtin_abort (); > + > + for (int i = 0; i < N; ++i) > + a[i] = i; > + > + if (test (1, a, 4) != 0) > + __builtin_abort (); > + if (test (0, a, 4) != 0) > + __builtin_abort (); > + if (test (-1, a, 4) != -1) > + __builtin_abort (); > + > + for (int i = 0; i < HALF; ++i) > + { > + a[i] = HALF - i; > + a[HALF + i] = i; > + } > + > + if (test (N, a, HALF - 16) != 17) > + __builtin_abort (); > + if (test (N, a, HALF - 2) != 3) > + __builtin_abort (); > + if (test (N, a, HALF - 1) != 2) > + __builtin_abort (); > + if (test (N, a, HALF) != 1) > + __builtin_abort (); > + if (test (N, a, HALF + 1) != 0) > + __builtin_abort (); > + if (test (N, a, HALF + 2) != 0) > + __builtin_abort (); > + if (test (N, a, HALF + 3) != 0) > + __builtin_abort (); > + if (test (N, a, HALF + 16) != 0) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c > b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c > new file mode 100644 > index 00000000000..21e45cca55a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c > @@ -0,0 +1,9 @@ > +#ifndef TYPE > +#define TYPE double > +#define FN __builtin_fmin > +#endif > + > +#include "vect-fmin-1.c" > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c > b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c > new file mode 100644 > index 00000000000..cc38bf43909 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c > @@ -0,0 +1,83 @@ > +#include "tree-vect.h" > + > +void __attribute__((noipa)) > +test (double x0, double x1, double *restrict res, double *restrict ptr, int > n) > +{ > + for (int i = 0; i < n; i += 2) > + { > + x0 = __builtin_fmin (x0, ptr[i + 0]); > + x1 = __builtin_fmin (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +#define N 128 > +#define HALF (N / 2) > + > +int > +main (void) > +{ > + check_vect (); > + > + double res[2], a[N]; > + > + for (int i = 0; i < N; i += 2) > + { > + a[i] = i < HALF ? HALF - i : 0; > + a[i + 1] = -i / 8; > + } > + > + test (N, N, res, a, 2); > + if (res[0] != HALF || res[1] != 0) > + __builtin_abort (); > + > + test (N, N, res, a, 6); > + if (res[0] != HALF - 4 || res[1] != 0) > + __builtin_abort (); > + > + test (N, N, res, a, 8); > + if (res[0] != HALF - 6 || res[1] != 0) > + __builtin_abort (); > + > + test (N, N, res, a, 10); > + if (res[0] != HALF - 8 || res[1] != -1) > + __builtin_abort (); > + > + test (N, N, res, a, HALF - 2); > + if (res[0] != 4 || res[1] != -HALF / 8 + 1) > + __builtin_abort (); > + > + test (N, N, res, a, HALF); > + if (res[0] != 2 || res[1] != -HALF / 8 + 1) > + __builtin_abort (); > + > + test (N, N, res, a, HALF + 2); > + if (res[0] != 0 || res[1] != -HALF / 8) > + __builtin_abort (); > + > + test (N, N, res, a, HALF + 8); > + if (res[0] != 0 || res[1] != -HALF / 8) > + __builtin_abort (); > + > + test (N, N, res, a, HALF + 10); > + if (res[0] != 0 || res[1] != -HALF / 8 - 1) > + __builtin_abort (); > + > + test (N, N, res, a, N); > + if (res[0] != 0 || res[1] != -N / 8 + 1) > + __builtin_abort (); > + > + test (-1, N, res, a, N); > + if (res[0] != -1 || res[1] != -N / 8 + 1) > + __builtin_abort (); > + > + test (-1, -N / 8, res, a, N); > + if (res[0] != -1 || res[1] != -N / 8) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */ > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target > vect_max_reduc } } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c > b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c > new file mode 100644 > index 00000000000..40c36c7a3dc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c > @@ -0,0 +1,24 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +#pragma GCC target "+nosve" > + > +float > +f1 (float x, float *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmaxf (x, ptr[i]); > + return x; > +} > + > +double > +f2 (double x, double *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmax (x, ptr[i]); > + return x; > +} > + > +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.4s, v[0-9]+\.4s, > v[0-9]+\.4s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } > } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, > v[0-9]+\.2d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c > b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c > new file mode 100644 > index 00000000000..6e48ac8eeee > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c > @@ -0,0 +1,20 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +#pragma GCC target "+nosve" > + > +void > +f (double *restrict res, double *restrict ptr) > +{ > + double x0 = res[0]; > + double x1 = res[1]; > + for (int i = 0; i < 128; i += 2) > + { > + x0 = __builtin_fmax (x0, ptr[i + 0]); > + x1 = __builtin_fmax (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, > v[0-9]+\.2d\n} 1 } } */ > +/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_1.c > b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c > new file mode 100644 > index 00000000000..1cf372b2a6b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c > @@ -0,0 +1,24 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +#pragma GCC target "+nosve" > + > +float > +f1 (float x, float *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fminf (x, ptr[i]); > + return x; > +} > + > +double > +f2 (double x, double *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmin (x, ptr[i]); > + return x; > +} > + > +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.4s, v[0-9]+\.4s, > v[0-9]+\.4s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } > } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, > v[0-9]+\.2d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfminnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_2.c > b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c > new file mode 100644 > index 00000000000..543e1884051 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c > @@ -0,0 +1,20 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +#pragma GCC target "+nosve" > + > +void > +f (double *restrict res, double *restrict ptr) > +{ > + double x0 = res[0]; > + double x1 = res[1]; > + for (int i = 0; i < 128; i += 2) > + { > + x0 = __builtin_fmin (x0, ptr[i + 0]); > + x1 = __builtin_fmin (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, > v[0-9]+\.2d\n} 1 } } */ > +/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c > new file mode 100644 > index 00000000000..ee3cdc20f96 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c > @@ -0,0 +1,22 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +float > +f1 (float x, float *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmaxf (x, ptr[i]); > + return x; > +} > + > +double > +f2 (double x, double *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmax (x, ptr[i]); > + return x; > +} > + > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.s,.*\tfmaxnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, > z[0-9]+\.s\n} } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, > z[0-9]+\.d\n} } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], > z[0-9]+\.d\n} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c > b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c > new file mode 100644 > index 00000000000..a8eee0f4b26 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c > @@ -0,0 +1,18 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +void > +f (double *restrict res, double *restrict ptr) > +{ > + double x0 = res[0]; > + double x1 = res[1]; > + for (int i = 0; i < 128; i += 2) > + { > + x0 = __builtin_fmax (x0, ptr[i + 0]); > + x1 = __builtin_fmax (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, > z[0-9]+\.d\n} } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], > z[0-9]+\.d\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c > new file mode 100644 > index 00000000000..10aced05f1a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c > @@ -0,0 +1,22 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +float > +f1 (float x, float *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fminf (x, ptr[i]); > + return x; > +} > + > +double > +f2 (double x, double *ptr) > +{ > + for (int i = 0; i < 128; ++i) > + x = __builtin_fmin (x, ptr[i]); > + return x; > +} > + > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.s,.*\tfminnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, > z[0-9]+\.s\n} } } */ > +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, > z[0-9]+\.d\n} } } */ > +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], > z[0-9]+\.d\n} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c > b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c > new file mode 100644 > index 00000000000..80ad0160249 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c > @@ -0,0 +1,18 @@ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ > + > +void > +f (double *restrict res, double *restrict ptr) > +{ > + double x0 = res[0]; > + double x1 = res[1]; > + for (int i = 0; i < 128; i += 2) > + { > + x0 = __builtin_fmin (x0, ptr[i + 0]); > + x1 = __builtin_fmin (x1, ptr[i + 1]); > + } > + res[0] = x0; > + res[1] = x1; > +} > + > +/* { dg-final { scan-assembler > {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, > z[0-9]+\.d\n} } } */ > +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], > z[0-9]+\.d\n} 2 } } */ > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index cae895a88f2..726cda05e7a 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -3185,9 +3185,22 @@ reduction_fn_for_scalar_code (code_helper code, > internal_fn *reduc_fn) > return true; > > default: > - break; > - } > - return false; > + return false; > + } > + else > + switch (combined_fn (code)) > + { > + CASE_CFN_FMAX: > + *reduc_fn = IFN_REDUC_FMAX; > + return true; > + > + CASE_CFN_FMIN: > + *reduc_fn = IFN_REDUC_FMIN; > + return true; > + > + default: > + return false; > + } > } > > /* If there is a neutral value X such that a reduction would not be affected > @@ -3223,9 +3236,18 @@ neutral_op_for_reduction (tree scalar_type, > code_helper code, > return initial_value; > > default: > - break; > + return NULL_TREE; > + } > + else > + switch (combined_fn (code)) > + { > + CASE_CFN_FMIN: > + CASE_CFN_FMAX: > + return initial_value; > + > + default: > + return NULL_TREE; > } > - return NULL_TREE; > } > > /* Error reporting helper for vect_is_simple_reduction below. GIMPLE > statement > @@ -3255,9 +3277,18 @@ needs_fold_left_reduction_p (tree type, code_helper > code) > return false; > > default: > - break; > + return !flag_associative_math; > + } > + else > + switch (combined_fn (code)) > + { > + CASE_CFN_FMIN: > + CASE_CFN_FMAX: > + return false; > + > + default: > + return !flag_associative_math; > } > - return !flag_associative_math; > } > > if (INTEGRAL_TYPE_P (type)) > -- > 2.25.1 >