This patch extends the expander for fma, fnma, fms, and fnms to support partial SVE FP modes.
We add the missing BF16 tests, which we can now trigger for having implemented the conditional expander. We also add tests for the 'merging with multiplicand' case, which this expander canonicalizes (albeit under SVE_STRICT_GP). gcc/ChangeLog: * config/aarch64/aarch64-sve.md (@cond_<optab><mode>): Extend to support partial FP modes. (*cond_<optab><mode>_2_strict): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (*cond_<optab><mode>_4_strict): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, use aarch64_predicate_operand. (*cond_<optab><mode>_any_strict): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases for merging with multiplcand. * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test. * gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise.. * gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise. * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise. * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 61 ++++++++++--------- .../sve/unpacked_cond_ternary_bf16_1.C | 35 +++++++++++ .../sve/unpacked_cond_ternary_bf16_2.C | 14 +++++ .../aarch64/sve/unpacked_cond_fmla_1.c | 4 ++ .../aarch64/sve/unpacked_cond_fmla_2.c | 18 ++++++ .../aarch64/sve/unpacked_cond_fmls_1.c | 4 ++ .../aarch64/sve/unpacked_cond_fmls_2.c | 18 ++++++ .../aarch64/sve/unpacked_cond_fnmla_1.c | 4 ++ .../aarch64/sve/unpacked_cond_fnmla_2.c | 18 ++++++ .../aarch64/sve/unpacked_cond_fnmls_1.c | 4 ++ .../aarch64/sve/unpacked_cond_fnmls_2.c | 18 ++++++ 11 files changed, 169 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index e5443980e8b..278f78960a6 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7599,17 +7599,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { @@ -7617,6 +7617,9 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + if (rtx pred = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1])) + operands[1] = pred; }) ;; Predicated floating-point ternary operations, merging with the @@ -7646,15 +7649,15 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7692,15 +7695,15 @@ ) (define_insn "*cond_<optab><mode>_4_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7760,17 +7763,17 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C new file mode 100644 index 00000000000..3a203184dab --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C @@ -0,0 +1,35 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */ + +#include <stdint.h> +#pragma GCC target "arch=armv9-a+sve-b16b16" + +#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * b + c : MERGE; } + +#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * -b + c : MERGE; } + +#define TEST_OP(TYPE, PRED_TYPE, T) \ + T (TYPE, PRED_TYPE, c) \ + T (TYPE, PRED_TYPE, 5) + +#define TEST(TYPE, PTYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS) + +TEST (__bf16, uint16_t, 128) + +TEST (__bf16, uint16_t, 64) + +/* { dg-final { scan-assembler-times {\tptrue} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C new file mode 100644 index 00000000000..bc47ece4c59 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#include "unpacked_cond_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tand} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c index e2a5bdab1dd..411ed821811 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) @@ -35,9 +37,11 @@ TEST_ALL (FMLA (f16), _Float16, uint32_t, 64) TEST_ALL (FMLA (f32), float, uint64_t, 32) /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c new file mode 100644 index 00000000000..afa01902c35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ + +#include "unpacked_cond_fmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c index 5a83fab8ec7..5a9122d207f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) @@ -35,9 +37,11 @@ TEST_ALL (FMLS (f16), _Float16, uint32_t, 64) TEST_ALL (FMLS (f32), float, uint64_t, 32) /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c new file mode 100644 index 00000000000..2c0407af0f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ + +#include "unpacked_cond_fmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c index 5d4eff806dd..8219c007dec 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) @@ -35,9 +37,11 @@ TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64) TEST_ALL (FNMLA (f32), float, uint64_t, 32) /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c new file mode 100644 index 00000000000..847a5be28c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ + +#include "unpacked_cond_fnmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c index 5569bbabd7d..811d6145fdd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c @@ -25,6 +25,8 @@ } #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ TEST_FN (FN, TYPE0, TYPE1, COUNT, 5) @@ -35,9 +37,11 @@ TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64) TEST_ALL (FNMLS (f32), float, uint64_t, 32) /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c new file mode 100644 index 00000000000..423e0732573 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */ + +#include "unpacked_cond_fnmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ -- 2.34.1