https://gcc.gnu.org/g:556ed247adc9857ebd89a5bdbcdc8f929f73bd1e
commit r16-2456-g556ed247adc9857ebd89a5bdbcdc8f929f73bd1e Author: Spencer Abson <spencer.ab...@arm.com> Date: Mon Jul 7 16:49:17 2025 +0000 aarch64: Add support for unpacked SVE FP unary operations This patch extends the expander for unpredicated round, nearbyint, floor, ceil, rint, and trunc, so that it can handle partial SVE FP modes. We move fabs and fneg to a separate expander, since they are not trapping instructions. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (<optab><mode>2): Replace use of aarch64_ptrue_reg with aarch64_sve_fp_pred. (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F to SVE_F, and use aarch64_predicate_operand. * config/aarch64/iterators.md: Split FABS/FNEG out of SVE_COND_FP_UNARY (into new SVE_COND_FP_UNARY_BITWISE). gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fabs_1.c: New test. * gcc.target/aarch64/sve/unpacked_fneg_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinta_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frinti_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintp_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintp_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintx_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_frintz_2.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve.md | 32 ++++++++++++++++------ gcc/config/aarch64/iterators.md | 14 +++++----- .../gcc.target/aarch64/sve/unpacked_fabs_1.c | 28 +++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_fneg_1.c | 30 ++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frinta_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frinta_2.c | 15 ++++++++++ .../gcc.target/aarch64/sve/unpacked_frinti_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frinti_2.c | 15 ++++++++++ .../gcc.target/aarch64/sve/unpacked_frintm_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frintm_2.c | 15 ++++++++++ .../gcc.target/aarch64/sve/unpacked_frintp_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frintp_2.c | 15 ++++++++++ .../gcc.target/aarch64/sve/unpacked_frintx_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frintx_2.c | 15 ++++++++++ .../gcc.target/aarch64/sve/unpacked_frintz_1.c | 31 +++++++++++++++++++++ .../gcc.target/aarch64/sve/unpacked_frintz_2.c | 15 ++++++++++ 16 files changed, 365 insertions(+), 15 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d53d297e7e4e..9a8ff216999f 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3762,13 +3762,29 @@ ;; Unpredicated floating-point unary operations. (define_expand "<optab><mode>2" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 2) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand")] + (match_dup 3) + (match_operand:SVE_F 1 "register_operand")] SVE_COND_FP_UNARY_OPTAB))] "TARGET_SVE" + { + operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]); + } +) + +;; FABS and FNEG are non-trapping, so we can always expand with a <VPRED> +;; predicate. It doesn't matter whether the padding bits of a partial +;; vector mode are active or inactive. +(define_expand "<optab><mode>2" + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_dup 2) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_F 1 "register_operand")] + SVE_COND_FP_UNARY_BITWISE))] + "TARGET_SVE" { operands[2] = aarch64_ptrue_reg (<VPRED>mode); } @@ -3776,11 +3792,11 @@ ;; Predicated floating-point unary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 3 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 22d7aa9cf9d7..795c4ac7a579 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3430,9 +3430,10 @@ UNSPEC_FMINQV UNSPEC_FMINNMQV]) -(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS + UNSPEC_COND_FNEG]) + +(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM @@ -3440,13 +3441,12 @@ UNSPEC_COND_FRINTP UNSPEC_COND_FRINTX UNSPEC_COND_FRINTZ - UNSPEC_COND_FSQRT]) + UNSPEC_COND_FSQRT + SVE_COND_FP_UNARY_BITWISE]) ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated ;; <optab><mode>2 expander. -(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS - UNSPEC_COND_FNEG - UNSPEC_COND_FRECPX +(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX UNSPEC_COND_FRINTA UNSPEC_COND_FRINTI UNSPEC_COND_FRINTM diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c new file mode 100644 index 000000000000..f09cfe84065d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_fabsf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_fabsf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_fabsf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c new file mode 100644 index 000000000000..d489ecb67617 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define NEG(X) -X + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (NEG, _Float16, uint64_t, 32) + +TEST_FN (NEG, _Float16, uint32_t, 64) + +TEST_FN (NEG, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c new file mode 100644 index 000000000000..3cbdef3d99be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_roundf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_roundf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_roundf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c new file mode 100644 index 000000000000..4564686636bc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinta_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c new file mode 100644 index 000000000000..7645fed5136a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_nearbyintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_nearbyintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_nearbyintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c new file mode 100644 index 000000000000..eadce07cf1c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frinti_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c new file mode 100644 index 000000000000..98f85fb5bfee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_floorf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_floorf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_floorf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c new file mode 100644 index 000000000000..56988be786d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintm_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c new file mode 100644 index 000000000000..f2336979ad0f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_ceilf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_ceilf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_ceilf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c new file mode 100644 index 000000000000..c24c6326d1ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintp_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c new file mode 100644 index 000000000000..73403a54ecbb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_rintf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_rintf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_rintf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c new file mode 100644 index 000000000000..e8b8924537f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintx_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c new file mode 100644 index 000000000000..73778431c2ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i]) > b[i]) \ + out[i] = 3; \ + } + +TEST_FN (__builtin_truncf16, _Float16, uint64_t, 32) + +TEST_FN (__builtin_truncf16, _Float16, uint32_t, 64) + +TEST_FN (__builtin_truncf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c new file mode 100644 index 000000000000..177912296635 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_frintz_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */