This patch extends the MLS/MSB patterns to support unpacked integer vectors. The type suffix could be either the element size or the container size, but using the element size should be more efficient.
Tested on aarch64-linux-gnu and aarch64_be-elf, pushed to trunk. Richard gcc/ * config/aarch64/aarch64-sve.md (fnma<mode>4): Extend from SVE_FULL_I to SVE_I. (@aarch64_pred_fnma<mode>, cond_fnma<mode>, *cond_fnma<mode>_2) (*cond_fnma<mode>_4, *cond_fnma<mode>_any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/mls_2.c: New test. * g++.target/aarch64/sve/cond_mls_1.C: Likewise. * g++.target/aarch64/sve/cond_mls_2.C: Likewise. * g++.target/aarch64/sve/cond_mls_3.C: Likewise. * g++.target/aarch64/sve/cond_mls_4.C: Likewise. * g++.target/aarch64/sve/cond_mls_5.C: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 88 +++++++++---------- .../g++.target/aarch64/sve/cond_mls_1.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mls_2.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mls_3.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mls_4.C | 36 ++++++++ .../g++.target/aarch64/sve/cond_mls_5.C | 33 +++++++ gcc/testsuite/gcc.target/aarch64/sve/mls_2.c | 34 +++++++ 7 files changed, 246 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mls_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index ac8a9b4b167..da15bd87885 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6695,14 +6695,14 @@ (define_insn_and_rewrite "*cond_fma<mode>_any" ;; Unpredicated integer subtraction of product. (define_expand "fnma<mode>4" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 3 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (minus:SVE_I + (match_operand:SVE_I 3 "register_operand") + (unspec:SVE_I [(match_dup 4) - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") - (match_operand:SVE_FULL_I 2 "general_operand"))] + (mult:SVE_I + (match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "general_operand"))] UNSPEC_PRED_X)))] "TARGET_SVE" { @@ -6714,14 +6714,14 @@ (define_expand "fnma<mode>4" ;; Predicated integer subtraction of product. (define_insn "@aarch64_pred_fnma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, 0, w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "%0, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w"))] UNSPEC_PRED_X)))] "TARGET_SVE" "@ @@ -6733,15 +6733,15 @@ (define_insn "@aarch64_pred_fnma<mode>" ;; Predicated integer subtraction of product with merging. (define_expand "cond_fnma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand") - (match_operand:SVE_FULL_I 3 "general_operand"))) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "general_operand"))) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" { @@ -6756,14 +6756,14 @@ (define_expand "cond_fnma<mode>" ;; Predicated integer subtraction of product, merging with the first input. (define_insn "*cond_fnma<mode>_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE" @@ -6775,14 +6775,14 @@ (define_insn "*cond_fnma<mode>_2" ;; Predicated integer subtraction of product, merging with the third input. (define_insn "*cond_fnma<mode>_4" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "0, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "0, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))) (match_dup 4)] UNSPEC_SEL))] "TARGET_SVE" @@ -6795,15 +6795,15 @@ (define_insn "*cond_fnma<mode>_4" ;; Predicated integer subtraction of product, merging with an ;; independent value. (define_insn_and_rewrite "*cond_fnma<mode>_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C new file mode 100644 index 00000000000..f472db4743a --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : a; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C new file mode 100644 index 00000000000..f10b4615c6e --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : b; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C new file mode 100644 index 00000000000..770d963dfee --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C new file mode 100644 index 00000000000..fac8d95f95f --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C @@ -0,0 +1,36 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : d; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C new file mode 100644 index 00000000000..82b89b4911b --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : 0; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b,} 3 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c new file mode 100644 index 00000000000..b08812e06ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_##AMT (TYPE a, TYPE b, TYPE c) \ + { \ + return a - b * c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b,} 6 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h,} 4 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s,} 2 } } */