Spencer Abson <spencer.ab...@arm.com> writes: > Extend the binary op/UNSPEC_SEL combiner patterns from SVE_FULL_F/ > SVE_FULL_F_B16B16 to SVE_F/SVE_F_B16B16, where the strictness value > is SVE_RELAXED_GP. > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed): > Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16. > (*cond_<optab><mode>_3_relaxed): Likewise. > (*cond_<optab><mode>_any_relaxed): Likwise. > (*cond_<optab><mode>_any_const_relaxed): Extend from SVE_FULL_F > to SVE_F. > (*cond_add<mode>_2_const_relaxed): Likewise. > (*cond_add<mode>_any_const_relaxed): Likewise. > (*cond_sub<mode>_3_const_relaxed): Likewise. > (*cond_sub<mode>_const_relaxed): Likewise. > > gcc/testsuite/ChangeLog: > > * g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C: New test. > * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fadd_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fmul_1.c: Likewise.. > * gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c: Likewise.
Here too it might be good to have a token test for the default trapping-math case, to make sure that the SELs are still present. OK with that change, thanks. Richard > --- > gcc/config/aarch64/aarch64-sve.md | 98 +++++++++---------- > .../aarch64/sve/unpacked_cond_binary_bf16_1.C | 46 +++++++++ > .../sve/unpacked_cond_builtin_fmax_1.c | 47 +++++++++ > .../sve/unpacked_cond_builtin_fmin_1.c | 47 +++++++++ > .../aarch64/sve/unpacked_cond_fadd_1.c | 58 +++++++++++ > .../aarch64/sve/unpacked_cond_fdiv_1.c | 43 ++++++++ > .../aarch64/sve/unpacked_cond_fmaxnm_1.c | 49 ++++++++++ > .../aarch64/sve/unpacked_cond_fminnm_1.c | 49 ++++++++++ > .../aarch64/sve/unpacked_cond_fmul_1.c | 46 +++++++++ > .../aarch64/sve/unpacked_cond_fsubr_1.c | 53 ++++++++++ > 10 files changed, 487 insertions(+), 49 deletions(-) > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index 79a087837de..d111e0b9261 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -5585,14 +5585,14 @@ > > ;; Predicated floating-point operations, merging with the first input. > (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + (unspec:SVE_F_B16B16 > [(match_operand 4) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > (match_dup 2)] > UNSPEC_SEL))] > @@ -5628,14 +5628,14 @@ > > ;; Same for operations that take a 1-bit constant. > (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 4) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] > SVE_COND_FP_BINARY_I1) > (match_dup 2)] > UNSPEC_SEL))] > @@ -5671,14 +5671,14 @@ > > ;; Predicated floating-point operations, merging with the second input. > (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + (unspec:SVE_F_B16B16 > [(match_operand 4) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > (match_dup 3)] > UNSPEC_SEL))] > @@ -5714,16 +5714,16 @@ > > ;; Predicated floating-point operations, merging with an independent value. > (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + (unspec:SVE_F_B16B16 > [(match_operand 5) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE > && (<supports_bf16> || !<is_bf16>) > @@ -5796,16 +5796,16 @@ > > ;; Same for operations that take a 1-bit constant. > (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 5) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] > SVE_COND_FP_BINARY_I1) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" > {@ [ cons: =0 , 1 , 2 , 4 ] > @@ -5892,14 +5892,14 @@ > ;; Predicated floating-point addition of a constant, merging with the > ;; first input. > (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 4) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > UNSPEC_COND_FADD) > (match_dup 2)] > UNSPEC_SEL))] > @@ -5940,16 +5940,16 @@ > ;; Predicated floating-point addition of a constant, merging with an > ;; independent value. > (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 5) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > UNSPEC_COND_FADD) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" > {@ [ cons: =0 , 1 , 2 , 3 , 4 ] > @@ -6207,14 +6207,14 @@ > ;; Predicated floating-point subtraction from a constant, merging with the > ;; second input. > (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 4) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") > - (match_operand:SVE_FULL_F 3 "register_operand")] > + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") > + (match_operand:SVE_F 3 "register_operand")] > UNSPEC_COND_FSUB) > (match_dup 3)] > UNSPEC_SEL))] > @@ -6251,16 +6251,16 @@ > ;; Predicated floating-point subtraction from a constant, merging with an > ;; independent value. > (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + (unspec:SVE_F > [(match_operand 5) > (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") > - (match_operand:SVE_FULL_F 3 "register_operand")] > + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") > + (match_operand:SVE_F 3 "register_operand")] > UNSPEC_COND_FSUB) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" > {@ [ cons: =0 , 1 , 3 , 4 ] > diff --git > a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C > new file mode 100644 > index 00000000000..560d874cff7 > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C > @@ -0,0 +1,46 @@ > +/* { dg-do compile }*/ > +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fno-trapping-math > -msve-vector-bits=2048 " } */ > + > +#include <stdint.h> > +#pragma GCC target "arch=armv9-a+sve-b16b16" > + > +#define ADD(a, b) a + b > +#define SUB(a, b) a - b > +#define MUL(a, b) a * b > +#define MAX(a, b) (a > b) ? a : b > +#define MIN(a, b) (a > b) ? b : a > + > +#define COND_OP(OP, TYPE, PRED_TYPE, ARG2, MERGE) > \ > + TYPE test_##OP##_##TYPE##_##ARG2##_##MERGE (TYPE a, TYPE b, TYPE c, > PRED_TYPE p) \ > + {return p ? OP (a, ARG2) : MERGE; } > + > +#define TEST_OP(OP, TYPE, PRED_TYPE, T) \ > + T (OP, TYPE, PRED_TYPE, b, a) \ > + T (OP, TYPE, PRED_TYPE, b, b) \ > + T (OP, TYPE, PRED_TYPE, b, c) > + > +#define TEST_ALL(TYPE, PRED_TYPE, T) \ > + TEST_OP (ADD, TYPE, PRED_TYPE, T) \ > + TEST_OP (SUB, TYPE, PRED_TYPE, T) \ > + TEST_OP (MUL, TYPE, PRED_TYPE, T) \ > + TEST_OP (MAX, TYPE, PRED_TYPE, T) \ > + TEST_OP (MIN, TYPE, PRED_TYPE, T) > + > +#define TEST(TYPE, PTYPE, SIZE) \ > + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ > + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ > + TEST_ALL (TYPE##SIZE, PTYPE##SIZE, COND_OP) > + > +TEST (__bf16, uint16_t, 128) > + > +TEST (__bf16, uint16_t, 64) > + > +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > + > +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > + > +// There's no BFSUBR. > +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c > new file mode 100644 > index 00000000000..50ad22b81bd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i) > + > +TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32) > + > +TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64) > + > +TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c > new file mode 100644 > index 00000000000..df860333291 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i) > + > +TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32) > + > +TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64) > + > +TEST_ALL (__builtin_fminf32, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c > new file mode 100644 > index 00000000000..0fc820fc39a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c > @@ -0,0 +1,58 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > +#define imm_p5 0.5 > + > +#define ADD(A, B) A + B > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##NAME##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, b_i) > + > +TEST_ALL (ADD, _Float16, uint64_t, 32) > + > +TEST_ALL (ADD, _Float16, uint32_t, 64) > + > +TEST_ALL (ADD, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 5 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 10 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c > new file mode 100644 > index 00000000000..0339562d0c3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > + > +#define DIV(A, B) A / B > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) > + > +TEST_ALL (DIV, _Float16, uint64_t, 32) > + > +TEST_ALL (DIV, _Float16, uint32_t, 64) > + > +TEST_ALL (DIV, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c > new file mode 100644 > index 00000000000..e7328b4fa9d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > + > +#define MAX(A, B) (A > B) ? A : B > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i) > + > +TEST_ALL (MAX, _Float16, uint64_t, 32) > + > +TEST_ALL (MAX, _Float16, uint32_t, 64) > + > +TEST_ALL (MAX, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c > new file mode 100644 > index 00000000000..43bf97439ab > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > + > +#define MIN(A, B) (A < B) ? A : B > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i) > + > +TEST_ALL (MIN, _Float16, uint64_t, 32) > + > +TEST_ALL (MIN, _Float16, uint32_t, 64) > + > +TEST_ALL (MIN, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c > new file mode 100644 > index 00000000000..181f1709eeb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c > @@ -0,0 +1,46 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > +#define imm_p5 0.5 > + > +#define MUL(A, B) A * B > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i) > + > +TEST_ALL (MUL, _Float16, uint64_t, 32) > + > +TEST_ALL (MUL, _Float16, uint32_t, 64) > + > +TEST_ALL (MUL, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c > new file mode 100644 > index 00000000000..9dbea43c015 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c > @@ -0,0 +1,53 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include <stdint.h> > + > +#define a_i a[i] > +#define b_i b[i] > +#define c_i c[i] > +#define imm_p5 0.5 > + > +#define SUBR(A, B) B - A > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE) \ > + void \ > + f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE1 *__restrict p) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE; \ > + } > + > +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i) > + > +TEST_ALL (SUBR, _Float16, uint64_t, 32) > + > +TEST_ALL (SUBR, _Float16, uint32_t, 64) > + > +TEST_ALL (SUBR, float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > + > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */