This patch extends the unpredicated FP division expander to support partial FP modes. It extends the existing patterns used to implement UNSPEC_COND_FDIV and it's approximation as needed.
gcc/ChangeLog: * config/aarch64/aarch64-sve.md: (@aarch64_sve_<optab><mode>): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (div<mode>3): Extend from SVE_FULL_F to SVE_F. (@aarch64_frecpe<mode>): Likewise. (@aarch64_frecps<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fdiv_1.c: New test. * gcc.target/aarch64/sve/unpacked_fdiv_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fdiv_3.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 50 +++++++++---------- .../gcc.target/aarch64/sve/unpacked_fdiv_1.c | 34 +++++++++++++ .../gcc.target/aarch64/sve/unpacked_fdiv_2.c | 11 ++++ .../gcc.target/aarch64/sve/unpacked_fdiv_3.c | 11 ++++ 4 files changed, 81 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index cdad900d9cf..79a087837de 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3752,9 +3752,9 @@ ;; Unpredicated floating-point unary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w")] SVE_FP_UNARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" @@ -5525,10 +5525,10 @@ ;; Unpredicated floating-point binary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w") - (match_operand:SVE_FULL_F 2 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w") + (match_operand:SVE_F 2 "register_operand" "w")] SVE_FP_BINARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" @@ -5552,12 +5552,12 @@ ;; Predicated floating-point binary operations that have no immediate forms. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_BINARY_REG))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] @@ -6649,12 +6649,12 @@ ;; ------------------------------------------------------------------------- (define_expand "div<mode>3" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "nonmemory_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_dup 4) + (match_operand:SVE_F 1 "nonmemory_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_COND_FDIV))] "TARGET_SVE" { @@ -6662,23 +6662,23 @@ DONE; operands[1] = force_reg (<MODE>mode, operands[1]); - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]); } ) (define_expand "@aarch64_frecpe<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand")] UNSPEC_FRECPE))] "TARGET_SVE" ) (define_expand "@aarch64_frecps<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_FRECPS))] "TARGET_SVE" ) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c new file mode 100644 index 00000000000..64514264b0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] +#define DIV(A, B) A / B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], (TYPE0)RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) + +TEST_ALL (DIV, _Float16, uint64_t, 32) + +TEST_ALL (DIV, _Float16, uint32_t, 64) + +TEST_ALL (DIV, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c new file mode 100644 index 00000000000..17821305685 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fdiv_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c new file mode 100644 index 00000000000..57f4aef4f13 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fdiv_3.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-ftree-vectorize -msve-vector-bits=2048 -Ofast -mlow-precision-div" } */ + +#include "unpacked_fdiv_1.c" + +/* { dg-final { scan-assembler-not {\tfrecpe\tz[0-9]+\.h} } } */ +/* { dg-final { scan-assembler-not {\tfrecps\tz[0-9]+\.h} } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tfrecpe\tz[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrecps\tz[0-9]+\.s} 1 } } */ -- 2.34.1