This patch extends the expander for unpredicated round, nearbyint, floor, ceil, rint, and trunc, so that it can handle partial SVE FP modes.
We move fabs and fneg to a separate expander, since they are not trapping instructions. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (<optab><mode>2): Replace use of aarch64_ptrue_reg with aarch64_sve_fp_pred. (@aarch64_pred_<optab><mode>) Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. * config/aarch64/iterators.md: Split FABS/FNEG out of SVE_COND_FP_UNARY (into SVE_COND_FP_UNARY_BITWISE). gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fabs_1.c: New test. * gcc.target/aarch64/sve/unpacked_fneg_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintp_1.c: Likewiss. * gcc.target/aarch64/sve/unpacked_frintp_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_2.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 31 ++++++++++++++----- gcc/config/aarch64/iterators.md | 14 ++++----- .../gcc.target/aarch64/sve/unpacked_fabs_1.c | 24 ++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fneg_1.c | 26 ++++++++++++++++ .../aarch64/sve/unpacked_frinta_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frinta_2.c | 11 +++++++ .../aarch64/sve/unpacked_frinti_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frinti_2.c | 11 +++++++ .../aarch64/sve/unpacked_frintm_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frintm_2.c | 11 +++++++ .../aarch64/sve/unpacked_frintp_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frintp_2.c | 11 +++++++ .../aarch64/sve/unpacked_frintx_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frintx_2.c | 11 +++++++ .../aarch64/sve/unpacked_frintz_1.c | 27 ++++++++++++++++ .../aarch64/sve/unpacked_frintz_2.c | 11 +++++++ 16 files changed, 308 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 399d147c9a5..1a705e153cb 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3762,13 +3762,28 @@ ;; Unpredicated floating-point unary operations. (define_expand "<optab><mode>2" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 2) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand")] + (match_dup 3) + (match_operand:SVE_F 1 "register_operand")] SVE_COND_FP_UNARY_OPTAB))] "TARGET_SVE" + { + operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]); + } +) + +;; FABS and FNEG are non-trapping, we can always expand with their +;; natural PTRUE. +(define_expand "<optab><mode>2" + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_dup 2) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_F 1 "register_operand")] + SVE_COND_FP_UNARY_BITWISE))] + "TARGET_SVE" { operands[2] = aarch64_ptrue_reg (<VPRED>mode); } @@ -3776,11 +3791,11 @@ ;; Predicated floating-point unary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 3 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 41e483bb80e..7d7d0732d29 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3352,9 +3352,10 @@ UNSPEC_FMINQV UNSPEC_FMINNMQV]) -(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS + UNSPEC_COND_FNEG]) + +(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM @@ -3362,13 +3363,12 @@ UNSPEC_COND_FRINTP UNSPEC_COND_FRINTX UNSPEC_COND_FRINTZ - UNSPEC_COND_FSQRT]) + UNSPEC_COND_FSQRT + SVE_COND_FP_UNARY_BITWISE]) ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated ;; <optab><mode>2 expander. -(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c new file mode 100644 index 00000000000..b5d62de00da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_fabsf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_fabsf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_fabsf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c new file mode 100644 index 00000000000..55d8c5b522c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define NEG(X) -X + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (NEG, _Float16, uint64_t, 32) + +TEST_FN (NEG, _Float16, uint32_t, 64) + +TEST_FN (NEG, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c new file mode 100644 index 00000000000..e1e611f2144 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_roundf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_roundf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_roundf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c new file mode 100644 index 00000000000..b810a177115 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinta_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c new file mode 100644 index 00000000000..d1d9c48da27 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_nearbyintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_nearbyintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_nearbyintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c new file mode 100644 index 00000000000..ca599fa7e2d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinti_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c new file mode 100644 index 00000000000..df50ba79645 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_floorf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_floorf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_floorf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c new file mode 100644 index 00000000000..ad025992f5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintm_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c new file mode 100644 index 00000000000..4769addc4e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_ceilf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_ceilf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_ceilf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c new file mode 100644 index 00000000000..ce3033f055f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintp_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c new file mode 100644 index 00000000000..103122055d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_rintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_rintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_rintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c new file mode 100644 index 00000000000..5600f864273 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintx_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c new file mode 100644 index 00000000000..31ce6f3c022 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_truncf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_truncf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_truncf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c new file mode 100644 index 00000000000..eadc8529522 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintz_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ -- 2.34.1