[PATCH 06/14] aarch64: Add support for unpacked SVE FP unary operations

Spencer Abson Mon, 02 Jun 2025 03:12:09 -0700

This patch extends the expander for unpredicated round, nearbyint, floor,
ceil, rint, and trunc, so that it can handle partial SVE FP modes.


We move fabs and fneg to a separate expander, since they are not trapping
instructions.

gcc/ChangeLog:

        * config/aarch64/aarch64-sve.md (<optab><mode>2): Replace use of
        aarch64_ptrue_reg with aarch64_sve_fp_pred.
        (@aarch64_pred_<optab><mode>) Extend from SVE_FULL_F to SVE_F,
        use aarch64_predicate_operand.
        * config/aarch64/iterators.md: Split FABS/FNEG out of
        SVE_COND_FP_UNARY (into SVE_COND_FP_UNARY_BITWISE).

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/unpacked_fabs_1.c: New test.
        * gcc.target/aarch64/sve/unpacked_fneg_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frinta_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frinta_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frinti_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frinti_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintm_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintm_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintp_1.c: Likewiss.
        * gcc.target/aarch64/sve/unpacked_frintp_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintx_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintx_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintz_1.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_frintz_2.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md             | 31 ++++++++++++++-----
 gcc/config/aarch64/iterators.md               | 14 ++++-----
 .../gcc.target/aarch64/sve/unpacked_fabs_1.c  | 24 ++++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fneg_1.c  | 26 ++++++++++++++++
 .../aarch64/sve/unpacked_frinta_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frinta_2.c           | 11 +++++++
 .../aarch64/sve/unpacked_frinti_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frinti_2.c           | 11 +++++++
 .../aarch64/sve/unpacked_frintm_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frintm_2.c           | 11 +++++++
 .../aarch64/sve/unpacked_frintp_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frintp_2.c           | 11 +++++++
 .../aarch64/sve/unpacked_frintx_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frintx_2.c           | 11 +++++++
 .../aarch64/sve/unpacked_frintz_1.c           | 27 ++++++++++++++++
 .../aarch64/sve/unpacked_frintz_2.c           | 11 +++++++
 16 files changed, 308 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 399d147c9a5..1a705e153cb 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3762,13 +3762,28 @@
 
 ;; Unpredicated floating-point unary operations.
 (define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_dup 2)
-          (const_int SVE_RELAXED_GP)
-          (match_operand:SVE_FULL_F 1 "register_operand")]
+          (match_dup 3)
+          (match_operand:SVE_F 1 "register_operand")]
          SVE_COND_FP_UNARY_OPTAB))]
   "TARGET_SVE"
+  {
+    operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]);
+  }
+)
+
+;; FABS and FNEG are non-trapping, we can always expand with their
+;; natural PTRUE.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 2)
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_F 1 "register_operand")]
+         SVE_COND_FP_UNARY_BITWISE))]
+  "TARGET_SVE"
   {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
@@ -3776,11 +3791,11 @@
 
 ;; Predicated floating-point unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")]
+          (match_operand:SVE_F 2 "register_operand")]
          SVE_COND_FP_UNARY))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 41e483bb80e..7d7d0732d29 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3352,9 +3352,10 @@
                                           UNSPEC_FMINQV
                                           UNSPEC_FMINNMQV])
 
-(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
-                                       UNSPEC_COND_FNEG
-                                       UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS
+                                               UNSPEC_COND_FNEG])
+
+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX
                                        UNSPEC_COND_FRINTA
                                        UNSPEC_COND_FRINTI
                                        UNSPEC_COND_FRINTM
@@ -3362,13 +3363,12 @@
                                        UNSPEC_COND_FRINTP
                                        UNSPEC_COND_FRINTX
                                        UNSPEC_COND_FRINTZ
-                                       UNSPEC_COND_FSQRT])
+                                       UNSPEC_COND_FSQRT
+                                       SVE_COND_FP_UNARY_BITWISE])
 
 ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated
 ;; <optab><mode>2 expander.
-(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS
-                                             UNSPEC_COND_FNEG
-                                             UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX
                                              UNSPEC_COND_FRINTA
                                              UNSPEC_COND_FRINTI
                                              UNSPEC_COND_FRINTM
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c
new file mode 100644
index 00000000000..b5d62de00da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fabs_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_fabsf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_fabsf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_fabsf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c
new file mode 100644
index 00000000000..55d8c5b522c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fneg_1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define NEG(X) -X
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (NEG, _Float16, uint64_t, 32)
+
+TEST_FN (NEG, _Float16, uint32_t, 64)
+
+TEST_FN (NEG, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c
new file mode 100644
index 00000000000..e1e611f2144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_roundf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_roundf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_roundf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c
new file mode 100644
index 00000000000..b810a177115
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinta_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frinta_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c
new file mode 100644
index 00000000000..d1d9c48da27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_nearbyintf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_nearbyintf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_nearbyintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c
new file mode 100644
index 00000000000..ca599fa7e2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frinti_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frinti_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c
new file mode 100644
index 00000000000..df50ba79645
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_floorf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_floorf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_floorf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c
new file mode 100644
index 00000000000..ad025992f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintm_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frintm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c
new file mode 100644
index 00000000000..4769addc4e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_ceilf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_ceilf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_ceilf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c
new file mode 100644
index 00000000000..ce3033f055f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintp_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frintp_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c
new file mode 100644
index 00000000000..103122055d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_rintf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_rintf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_rintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c
new file mode 100644
index 00000000000..5600f864273
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintx_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frintx_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c
new file mode 100644
index 00000000000..31ce6f3c022
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT)               \
+  void                                                 \
+  f_##FN##_##TYPE0##_##TYPE1 (TYPE1 *__restrict out,   \
+                             TYPE0 *__restrict a,      \
+                             TYPE0 *__restrict b)      \
+  {                                                    \
+    for (unsigned int i = 0; i < COUNT; i++)           \
+      if (FN (a[i]) > b[i])                            \
+       out[i] = 3;                                     \
+  }
+
+TEST_FN (__builtin_truncf16, _Float16, uint64_t, 32)
+
+TEST_FN (__builtin_truncf16, _Float16, uint32_t, 64)
+
+TEST_FN (__builtin_truncf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c
new file mode 100644
index 00000000000..eadc8529522
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_frintz_2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include "unpacked_frintz_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
-- 
2.34.1

[PATCH 06/14] aarch64: Add support for unpacked SVE FP unary operations

Reply via email to