Spencer Abson <spencer.ab...@arm.com> writes: > This patch extends the expander for fma, fnma, fms, and fnms to support > partial SVE FP modes. > > We add the missing BF16 tests, which we can now trigger for having > implemented the conditional expander. > > We also add tests for the 'merging with multiplicand' case, which this > expander canonicalizes (albeit under SVE_STRICT_GP). > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve.md (@cond_<optab><mode>): Extend > to support partial FP modes. > (*cond_<optab><mode>_2_strict): Extend from SVE_FULL_F to SVE_F, > use aarch64_predicate_operand. > (*cond_<optab><mode>_4_strict): Extend from SVE_FULL_F_B16B16 to > SVE_F_B16B16, use aarch64_predicate_operand. > (*cond_<optab><mode>_any_strict): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases > for merging with multiplcand. > * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test. > * gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise.. > * gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise. > * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise. > * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise.
OK, thanks (with the whatever the final call to aarch64_sve_emit_masked_fp_pred looks like). Richard > --- > gcc/config/aarch64/aarch64-sve.md | 61 ++++++++++--------- > .../sve/unpacked_cond_ternary_bf16_1.C | 35 +++++++++++ > .../sve/unpacked_cond_ternary_bf16_2.C | 14 +++++ > .../aarch64/sve/unpacked_cond_fmla_1.c | 4 ++ > .../aarch64/sve/unpacked_cond_fmla_2.c | 18 ++++++ > .../aarch64/sve/unpacked_cond_fmls_1.c | 4 ++ > .../aarch64/sve/unpacked_cond_fmls_2.c | 18 ++++++ > .../aarch64/sve/unpacked_cond_fnmla_1.c | 4 ++ > .../aarch64/sve/unpacked_cond_fnmla_2.c | 18 ++++++ > .../aarch64/sve/unpacked_cond_fnmls_1.c | 4 ++ > .../aarch64/sve/unpacked_cond_fnmls_2.c | 18 ++++++ > 11 files changed, 169 insertions(+), 29 deletions(-) > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index e5443980e8b..278f78960a6 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7599,17 +7599,17 @@ > > ;; Predicated floating-point ternary operations with merging. > (define_expand "@cond_<optab><mode>" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand") > + (match_operand:SVE_F_B16B16 4 "register_operand")] > SVE_COND_FP_TERNARY) > - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" > { > @@ -7617,6 +7617,9 @@ > second of the two. */ > if (rtx_equal_p (operands[3], operands[5])) > std::swap (operands[2], operands[3]); > + > + if (rtx pred = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1])) > + operands[1] = pred; > }) > > ;; Predicated floating-point ternary operations, merging with the > @@ -7646,15 +7649,15 @@ > ) > > (define_insn "*cond_<optab><mode>_2_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 "register_operand") > - (match_operand:SVE_FULL_F 4 "register_operand")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 "register_operand") > + (match_operand:SVE_F 4 "register_operand")] > SVE_COND_FP_TERNARY) > (match_dup 2)] > UNSPEC_SEL))] > @@ -7692,15 +7695,15 @@ > ) > > (define_insn "*cond_<optab><mode>_4_strict" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand") > + (match_operand:SVE_F_B16B16 4 "register_operand")] > SVE_COND_FP_TERNARY) > (match_dup 4)] > UNSPEC_SEL))] > @@ -7760,17 +7763,17 @@ > ) > > (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand") > + (match_operand:SVE_F_B16B16 4 "register_operand")] > SVE_COND_FP_TERNARY) > - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE > && (<supports_bf16> || !<is_bf16>) > diff --git > a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C > new file mode 100644 > index 00000000000..3a203184dab > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C > @@ -0,0 +1,35 @@ > +/* { dg-do compile }*/ > +/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */ > + > +#include <stdint.h> > +#pragma GCC target "arch=armv9-a+sve-b16b16" > + > +#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \ > + TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ > + {return p ? a * b + c : MERGE; } > + > +#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \ > + TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ > + {return p ? a * -b + c : MERGE; } > + > +#define TEST_OP(TYPE, PRED_TYPE, T) \ > + T (TYPE, PRED_TYPE, c) \ > + T (TYPE, PRED_TYPE, 5) > + > +#define TEST(TYPE, PTYPE, SIZE) \ > + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ > + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ > + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \ > + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS) > + > +TEST (__bf16, uint16_t, 128) > + > +TEST (__bf16, uint16_t, 64) > + > +/* { dg-final { scan-assembler-times {\tptrue} 8 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git > a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C > new file mode 100644 > index 00000000000..bc47ece4c59 > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048" } */ > + > +#include "unpacked_cond_ternary_bf16_1.C" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ > +/* { dg-final { scan-assembler-times {\tand} 8 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c > index e2a5bdab1dd..411ed821811 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c > @@ -25,6 +25,8 @@ > } > > #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) > > @@ -35,9 +37,11 @@ TEST_ALL (FMLA (f16), _Float16, uint32_t, 64) > TEST_ALL (FMLA (f32), float, uint64_t, 32) > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > > /* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c > new file mode 100644 > index 00000000000..afa01902c35 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ > + > +#include "unpacked_cond_fmla_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ > +/* { dg-final { scan-assembler-times {\tand} 12 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c > index 5a83fab8ec7..5a9122d207f 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c > @@ -25,6 +25,8 @@ > } > > #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) > > @@ -35,9 +37,11 @@ TEST_ALL (FMLS (f16), _Float16, uint32_t, 64) > TEST_ALL (FMLS (f32), float, uint64_t, 32) > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > > /* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c > new file mode 100644 > index 00000000000..2c0407af0f0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ > + > +#include "unpacked_cond_fmls_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ > +/* { dg-final { scan-assembler-times {\tand} 12 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c > index 5d4eff806dd..8219c007dec 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c > @@ -25,6 +25,8 @@ > } > > #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) > > @@ -35,9 +37,11 @@ TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64) > TEST_ALL (FNMLA (f32), float, uint64_t, 32) > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > > /* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c > new file mode 100644 > index 00000000000..847a5be28c3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ > + > +#include "unpacked_cond_fnmla_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ > +/* { dg-final { scan-assembler-times {\tand} 12 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c > index 5569bbabd7d..811d6145fdd 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c > @@ -25,6 +25,8 @@ > } > > #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ > + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ > TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) > > @@ -35,9 +37,11 @@ TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64) > TEST_ALL (FNMLS (f32), float, uint64_t, 32) > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > > /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > > /* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c > new file mode 100644 > index 00000000000..423e0732573 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ > + > +#include "unpacked_cond_fnmls_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ > +/* { dg-final { scan-assembler-times {\tand} 12 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */