This patch extends the expander for unconditional fma, fnma, fms, and fnms, so that it supports partial SVE FP modes.
gcc/ChangeLog: * config/aarch64/aarch64-sve.md (<optab><mode>4): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16. Use sve_fp_pred instead of aarch64_ptrue_reg. (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16. Use aarch64_predicate_operand. gcc/testsuite/ChangeLog: * g++.target/aarch64/sve/unpacked_ternary_bf16_1.C: New test. * g++.target/aarch64/sve/unpacked_ternary_bf16_2.C: Likewise. * gcc.target/aarch64/sve/unpacked_fmla_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmla_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmls_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fnmla_1.c: Likeiwse. * gcc.target/aarch64/sve/unpacked_fnmla_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fnmls_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fnmls_2.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 26 +++++++------- .../aarch64/sve/unpacked_ternary_bf16_1.C | 27 +++++++++++++++ .../aarch64/sve/unpacked_ternary_bf16_2.C | 11 ++++++ .../gcc.target/aarch64/sve/unpacked_fmla_1.c | 34 +++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fmla_2.c | 11 ++++++ .../gcc.target/aarch64/sve/unpacked_fmls_1.c | 34 +++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fmls_2.c | 11 ++++++ .../gcc.target/aarch64/sve/unpacked_fnmla_1.c | 34 +++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fnmla_2.c | 11 ++++++ .../gcc.target/aarch64/sve/unpacked_fnmls_1.c | 34 +++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fnmls_2.c | 11 ++++++ 11 files changed, 231 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 1ed2d065c15..8c1921ddf5c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7563,29 +7563,29 @@ ;; Unpredicated floating-point ternary operations. (define_expand "<optab><mode>4" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 4) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 1 "register_operand") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_dup 5) + (match_operand:SVE_F_B16B16 1 "register_operand") + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[4] = aarch64_ptrue_reg (<VPRED>mode); + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); } ) ;; Predicated floating-point ternary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 5 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C new file mode 100644 index 00000000000..19bfe95f298 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#define BFMLA(TYPE) \ + TYPE test_bfmla_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * b + c; } + +#define BFMLS(TYPE) \ + TYPE test_bfmls_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * -b + c; } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + BFMLA (TYPE##SIZE) \ + BFMLS (TYPE##SIZE) + +#pragma GCC target "arch=armv9-a+sve-b16b16" + +TEST_TYPE (__bf16, 128) + +TEST_TYPE (__bf16, 64) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C new file mode 100644 index 00000000000..ef37400dc3b --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 4 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c new file mode 100644 index 00000000000..f435689e297 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c new file mode 100644 index 00000000000..d32615d8766 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c new file mode 100644 index 00000000000..67cd088bd61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c new file mode 100644 index 00000000000..f1f2200593b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c new file mode 100644 index 00000000000..90e92069649 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c new file mode 100644 index 00000000000..71b51f3a6dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c new file mode 100644 index 00000000000..dbf58d014a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c new file mode 100644 index 00000000000..d904e1b6422 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -- 2.34.1