https://gcc.gnu.org/g:8d588dbde026b1b96a921b81e80b2de60fd9f8fd

commit r16-2457-g8d588dbde026b1b96a921b81e80b2de60fd9f8fd
Author: Spencer Abson <spencer.ab...@arm.com>
Date:   Mon Jul 7 18:26:35 2025 +0000

    aarch64: Relaxed SEL combiner patterns for unpacked SVE FP unary operations
    
    Extend the unary op/UNSPEC_SEL combiner patterns from SVE_FULL_F to SVE_F,
    where the strictness value is SVE_RELAXED_GP.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed):
            Extend from SVE_FULL_F to SVE_F.
            (*cond_<optab><mode>_any_relaxed): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/unpacked_cond_fabs_1.c: New test.
            * gcc.target/aarch64/sve/unpacked_cond_fneg_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frinta_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frinta_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frinti_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frintm_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frintp_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frintx_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_frintz_1.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 18 +++++-----
 .../gcc.target/aarch64/sve/unpacked_cond_fabs_1.c  | 37 ++++++++++++++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fneg_1.c  | 39 ++++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frinta_1.c           | 37 ++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frinta_2.c           | 18 ++++++++++
 .../aarch64/sve/unpacked_cond_frinti_1.c           | 37 ++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frintm_1.c           | 37 ++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frintp_1.c           | 37 ++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frintx_1.c           | 37 ++++++++++++++++++++
 .../aarch64/sve/unpacked_cond_frintz_1.c           | 37 ++++++++++++++++++++
 10 files changed, 325 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 9a8ff216999f..66dd5809bcda 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3822,13 +3822,13 @@
 
 ;; Predicated floating-point unary arithmetic, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 3)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")]
+             (match_operand:SVE_F 2 "register_operand")]
             SVE_COND_FP_UNARY)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -3870,15 +3870,15 @@
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")]
+             (match_operand:SVE_F 2 "register_operand")]
             SVE_COND_FP_UNARY)
-          (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c
new file mode 100644
index 000000000000..d959aa972521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_fabsf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fabsf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fabsf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c
new file mode 100644
index 000000000000..7280f4e9fa87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define NEG(X) -X
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (NEG, _Float16, uint64_t, 32)
+
+TEST_ALL (NEG, _Float16, uint32_t, 64)
+
+TEST_ALL (NEG, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c
new file mode 100644
index 000000000000..ed4efb69f435
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_roundf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_roundf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_roundf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c
new file mode 100644
index 000000000000..f20e2e6d4453
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -mtune=generic 
-ftree-vectorize" } */
+
+#include "unpacked_cond_frinta_1.c"
+
+/* Test that we don't drop SELs without -fno-trapping-math.  */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tsel\t} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c
new file mode 100644
index 000000000000..d682d150a36b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_nearbyintf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_nearbyintf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_nearbyintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c
new file mode 100644
index 000000000000..7d429b391160
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_floorf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_floorf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_floorf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c
new file mode 100644
index 000000000000..c6d0c8c98bb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_ceilf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_ceilf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_ceilf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c
new file mode 100644
index 000000000000..b8afef1c11ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_rintf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_rintf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_rintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c
new file mode 100644
index 000000000000..d55279bbaea4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                        \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p,   \
+                                       TYPE0 *__restrict out,  \
+                                       TYPE0 *__restrict a,    \
+                                       TYPE0 *__restrict b)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i]) : MERGE;                       \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_truncf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_truncf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_truncf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */

Reply via email to