Spencer Abson <spencer.ab...@arm.com> writes: > This patch extends the expander for unconditional fma, fnma, fms, and > fnms, so that it supports partial SVE FP modes. > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve.md (<optab><mode>4): Extend from > SVE_FULL_F_B16B16 to SVE_F_B16B16. Use sve_fp_pred instead > of aarch64_ptrue_reg. > (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F_B16B16 > to SVE_F_B16B16. Use aarch64_predicate_operand. > > gcc/testsuite/ChangeLog: > > * g++.target/aarch64/sve/unpacked_ternary_bf16_1.C: New test. > * g++.target/aarch64/sve/unpacked_ternary_bf16_2.C: Likewise. > * gcc.target/aarch64/sve/unpacked_fmla_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fmla_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fmls_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fmls_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fnmla_1.c: Likeiwse. > * gcc.target/aarch64/sve/unpacked_fnmla_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fnmls_1.c: Likewise. > * gcc.target/aarch64/sve/unpacked_fnmls_2.c: Likewise.
OK, thanks. Richard > --- > gcc/config/aarch64/aarch64-sve.md | 26 +++++++------- > .../aarch64/sve/unpacked_ternary_bf16_1.C | 27 +++++++++++++++ > .../aarch64/sve/unpacked_ternary_bf16_2.C | 11 ++++++ > .../gcc.target/aarch64/sve/unpacked_fmla_1.c | 34 +++++++++++++++++++ > .../gcc.target/aarch64/sve/unpacked_fmla_2.c | 11 ++++++ > .../gcc.target/aarch64/sve/unpacked_fmls_1.c | 34 +++++++++++++++++++ > .../gcc.target/aarch64/sve/unpacked_fmls_2.c | 11 ++++++ > .../gcc.target/aarch64/sve/unpacked_fnmla_1.c | 34 +++++++++++++++++++ > .../gcc.target/aarch64/sve/unpacked_fnmla_2.c | 11 ++++++ > .../gcc.target/aarch64/sve/unpacked_fnmls_1.c | 34 +++++++++++++++++++ > .../gcc.target/aarch64/sve/unpacked_fnmls_2.c | 11 ++++++ > 11 files changed, 231 insertions(+), 13 deletions(-) > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index 1ed2d065c15..8c1921ddf5c 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7563,29 +7563,29 @@ > > ;; Unpredicated floating-point ternary operations. > (define_expand "<optab><mode>4" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 4) > - (const_int SVE_RELAXED_GP) > - (match_operand:SVE_FULL_F_B16B16 1 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_dup 5) > + (match_operand:SVE_F_B16B16 1 "register_operand") > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_TERNARY))] > "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" > { > - operands[4] = aarch64_ptrue_reg (<VPRED>mode); > + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); > } > ) > > ;; Predicated floating-point ternary operations. > (define_insn "@aarch64_pred_<optab><mode>" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > (match_operand:SI 5 "aarch64_sve_gp_strictness") > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand") > + (match_operand:SVE_F_B16B16 4 "register_operand")] > SVE_COND_FP_TERNARY))] > "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" > {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] > diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C > new file mode 100644 > index 00000000000..19bfe95f298 > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048" } */ > + > +#define BFMLA(TYPE) \ > + TYPE test_bfmla_##TYPE (TYPE a, TYPE b, TYPE c) \ > + { return a * b + c; } > + > +#define BFMLS(TYPE) \ > + TYPE test_bfmls_##TYPE (TYPE a, TYPE b, TYPE c) \ > + { return a * -b + c; } > + > +#define TEST_TYPE(TYPE, SIZE) \ > + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ > + BFMLA (TYPE##SIZE) \ > + BFMLS (TYPE##SIZE) > + > +#pragma GCC target "arch=armv9-a+sve-b16b16" > + > +TEST_TYPE (__bf16, 128) > + > +TEST_TYPE (__bf16, 64) > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C > new file mode 100644 > index 00000000000..ef37400dc3b > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ > + > +#include "unpacked_ternary_bf16_1.C" > + > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 4 } } */ > + > +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c > new file mode 100644 > index 00000000000..f435689e297 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ > + > +#include <stdint.h> > + > +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) > +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) > +#define FNMLA(SUFF) -FMLA (SUFF) > +#define FNMLS(SUFF) -FMLS (SUFF) > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ > + void \ > + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE0 *__restrict d) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + if (FN > d[i]) \ > + out[i] = 3; \ > + } > + > +TEST_FN (FMLA (f16), _Float16, uint64_t, 32) > + > +TEST_FN (FMLA (f16), _Float16, uint32_t, 64) > + > +TEST_FN (FMLA (f32), float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c > new file mode 100644 > index 00000000000..d32615d8766 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include "unpacked_fmla_1.c" > + > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c > new file mode 100644 > index 00000000000..67cd088bd61 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ > + > +#include <stdint.h> > + > +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) > +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) > +#define FNMLA(SUFF) -FMLA (SUFF) > +#define FNMLS(SUFF) -FMLS (SUFF) > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ > + void \ > + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE0 *__restrict d) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + if (FN > d[i]) \ > + out[i] = 3; \ > + } > + > +TEST_FN (FMLS (f16), _Float16, uint64_t, 32) > + > +TEST_FN (FMLS (f16), _Float16, uint32_t, 64) > + > +TEST_FN (FMLS (f32), float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c > new file mode 100644 > index 00000000000..f1f2200593b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include "unpacked_fmls_1.c" > + > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c > new file mode 100644 > index 00000000000..90e92069649 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ > + > +#include <stdint.h> > + > +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) > +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) > +#define FNMLA(SUFF) -FMLA (SUFF) > +#define FNMLS(SUFF) -FMLS (SUFF) > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ > + void \ > + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE0 *__restrict d) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + if (FN > d[i]) \ > + out[i] = 3; \ > + } > + > +TEST_FN (FNMLA (f16), _Float16, uint64_t, 32) > + > +TEST_FN (FNMLA (f16), _Float16, uint32_t, 64) > + > +TEST_FN (FNMLA (f32), float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c > new file mode 100644 > index 00000000000..71b51f3a6dd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include "unpacked_fnmla_1.c" > + > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c > new file mode 100644 > index 00000000000..dbf58d014a2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ > + > +#include <stdint.h> > + > +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) > +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) > +#define FNMLA(SUFF) -FMLA (SUFF) > +#define FNMLS(SUFF) -FMLS (SUFF) > + > +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ > + void \ > + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ > + TYPE0 *__restrict a, \ > + TYPE0 *__restrict b, \ > + TYPE0 *__restrict c, \ > + TYPE0 *__restrict d) \ > + { \ > + for (unsigned int i = 0; i < COUNT; i++) \ > + if (FN > d[i]) \ > + out[i] = 3; \ > + } > + > +TEST_FN (FNMLS (f16), _Float16, uint64_t, 32) > + > +TEST_FN (FNMLS (f16), _Float16, uint32_t, 64) > + > +TEST_FN (FNMLS (f32), float, uint64_t, 32) > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c > new file mode 100644 > index 00000000000..d904e1b6422 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 > -fno-trapping-math" } */ > + > +#include "unpacked_fnmls_1.c" > + > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ > +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ > + > +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */