Re: [PATCH 10/14] aarch64: Relaxed SEL combiner patterns for unpacked SVE FP binary arithmetic

Richard Sandiford Fri, 06 Jun 2025 06:23:57 -0700

Spencer Abson <spencer.ab...@arm.com> writes:
> Extend the binary op/UNSPEC_SEL combiner patterns from SVE_FULL_F/
> SVE_FULL_F_B16B16 to SVE_F/SVE_F_B16B16, where the strictness value
> is SVE_RELAXED_GP.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed):
>       Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16.
>       (*cond_<optab><mode>_3_relaxed): Likewise.
>       (*cond_<optab><mode>_any_relaxed): Likwise.
>       (*cond_<optab><mode>_any_const_relaxed): Extend from SVE_FULL_F
>       to SVE_F.
>       (*cond_add<mode>_2_const_relaxed): Likewise.
>       (*cond_add<mode>_any_const_relaxed): Likewise.
>       (*cond_sub<mode>_3_const_relaxed): Likewise.
>       (*cond_sub<mode>_const_relaxed): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>       * g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C: New test.
>       * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_fadd_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_fmul_1.c: Likewise..
>       * gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c: Likewise.


Here too it might be good to have a token test for the default
trapping-math case, to make sure that the SELs are still present.

OK with that change, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve.md             | 98 +++++++++----------
>  .../aarch64/sve/unpacked_cond_binary_bf16_1.C | 46 +++++++++
>  .../sve/unpacked_cond_builtin_fmax_1.c        | 47 +++++++++
>  .../sve/unpacked_cond_builtin_fmin_1.c        | 47 +++++++++
>  .../aarch64/sve/unpacked_cond_fadd_1.c        | 58 +++++++++++
>  .../aarch64/sve/unpacked_cond_fdiv_1.c        | 43 ++++++++
>  .../aarch64/sve/unpacked_cond_fmaxnm_1.c      | 49 ++++++++++
>  .../aarch64/sve/unpacked_cond_fminnm_1.c      | 49 ++++++++++
>  .../aarch64/sve/unpacked_cond_fmul_1.c        | 46 +++++++++
>  .../aarch64/sve/unpacked_cond_fsubr_1.c       | 53 ++++++++++
>  10 files changed, 487 insertions(+), 49 deletions(-)
>  create mode 100644 
> gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 79a087837de..d111e0b9261 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -5585,14 +5585,14 @@
>  
>  ;; Predicated floating-point operations, merging with the first input.
>  (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
> -  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
> -     (unspec:SVE_FULL_F_B16B16
> +  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
> +     (unspec:SVE_F_B16B16
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F_B16B16
> +        (unspec:SVE_F_B16B16
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
> -           (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
> +           (match_operand:SVE_F_B16B16 2 "register_operand")
> +           (match_operand:SVE_F_B16B16 3 "register_operand")]
>            SVE_COND_FP_BINARY)
>          (match_dup 2)]
>         UNSPEC_SEL))]
> @@ -5628,14 +5628,14 @@
>  
>  ;; Same for operations that take a 1-bit constant.
>  (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")
> -           (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
> +           (match_operand:SVE_F 2 "register_operand")
> +           (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
>            SVE_COND_FP_BINARY_I1)
>          (match_dup 2)]
>         UNSPEC_SEL))]
> @@ -5671,14 +5671,14 @@
>  
>  ;; Predicated floating-point operations, merging with the second input.
>  (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
> -  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
> -     (unspec:SVE_FULL_F_B16B16
> +  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
> +     (unspec:SVE_F_B16B16
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F_B16B16
> +        (unspec:SVE_F_B16B16
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
> -           (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
> +           (match_operand:SVE_F_B16B16 2 "register_operand")
> +           (match_operand:SVE_F_B16B16 3 "register_operand")]
>            SVE_COND_FP_BINARY)
>          (match_dup 3)]
>         UNSPEC_SEL))]
> @@ -5714,16 +5714,16 @@
>  
>  ;; Predicated floating-point operations, merging with an independent value.
>  (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
> -  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
> -     (unspec:SVE_FULL_F_B16B16
> +  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
> +     (unspec:SVE_F_B16B16
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F_B16B16
> +        (unspec:SVE_F_B16B16
>            [(match_operand 5)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
> -           (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
> +           (match_operand:SVE_F_B16B16 2 "register_operand")
> +           (match_operand:SVE_F_B16B16 3 "register_operand")]
>            SVE_COND_FP_BINARY)
> -        (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
> +        (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE
>     && (<supports_bf16> || !<is_bf16>)
> @@ -5796,16 +5796,16 @@
>  
>  ;; Same for operations that take a 1-bit constant.
>  (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 5)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")
> -           (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
> +           (match_operand:SVE_F 2 "register_operand")
> +           (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
>            SVE_COND_FP_BINARY_I1)
> -        (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
> +        (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
>    {@ [ cons: =0 , 1   , 2 , 4   ]
> @@ -5892,14 +5892,14 @@
>  ;; Predicated floating-point addition of a constant, merging with the
>  ;; first input.
>  (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")
> -           (match_operand:SVE_FULL_F 3 
> "aarch64_sve_float_arith_with_sub_immediate")]
> +           (match_operand:SVE_F 2 "register_operand")
> +           (match_operand:SVE_F 3 
> "aarch64_sve_float_arith_with_sub_immediate")]
>            UNSPEC_COND_FADD)
>          (match_dup 2)]
>         UNSPEC_SEL))]
> @@ -5940,16 +5940,16 @@
>  ;; Predicated floating-point addition of a constant, merging with an
>  ;; independent value.
>  (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 5)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")
> -           (match_operand:SVE_FULL_F 3 
> "aarch64_sve_float_arith_with_sub_immediate")]
> +           (match_operand:SVE_F 2 "register_operand")
> +           (match_operand:SVE_F 3 
> "aarch64_sve_float_arith_with_sub_immediate")]
>            UNSPEC_COND_FADD)
> -        (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
> +        (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
>    {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
> @@ -6207,14 +6207,14 @@
>  ;; Predicated floating-point subtraction from a constant, merging with the
>  ;; second input.
>  (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
> -           (match_operand:SVE_FULL_F 3 "register_operand")]
> +           (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
> +           (match_operand:SVE_F 3 "register_operand")]
>            UNSPEC_COND_FSUB)
>          (match_dup 3)]
>         UNSPEC_SEL))]
> @@ -6251,16 +6251,16 @@
>  ;; Predicated floating-point subtraction from a constant, merging with an
>  ;; independent value.
>  (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 5)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
> -           (match_operand:SVE_FULL_F 3 "register_operand")]
> +           (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
> +           (match_operand:SVE_F 3 "register_operand")]
>            UNSPEC_COND_FSUB)
> -        (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
> +        (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
>    {@ [ cons: =0 , 1   , 3 , 4   ]
> diff --git 
> a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C 
> b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C
> new file mode 100644
> index 00000000000..560d874cff7
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C
> @@ -0,0 +1,46 @@
> +/* { dg-do compile }*/
> +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fno-trapping-math 
> -msve-vector-bits=2048 " } */
> +
> +#include <stdint.h>
> +#pragma GCC target "arch=armv9-a+sve-b16b16"
> +
> +#define ADD(a, b) a + b
> +#define SUB(a, b) a - b
> +#define MUL(a, b) a * b
> +#define MAX(a, b) (a > b) ? a : b
> +#define MIN(a, b) (a > b) ? b : a
> +
> +#define COND_OP(OP, TYPE, PRED_TYPE, ARG2, MERGE)                            
>       \
> +  TYPE test_##OP##_##TYPE##_##ARG2##_##MERGE (TYPE a, TYPE b, TYPE c, 
> PRED_TYPE p) \
> +  {return p ? OP (a, ARG2) : MERGE; }
> +
> +#define TEST_OP(OP, TYPE, PRED_TYPE, T)     \
> +  T (OP, TYPE, PRED_TYPE, b, a)             \
> +  T (OP, TYPE, PRED_TYPE, b, b)             \
> +  T (OP, TYPE, PRED_TYPE, b, c)
> +
> +#define TEST_ALL(TYPE, PRED_TYPE, T) \
> +  TEST_OP (ADD, TYPE, PRED_TYPE, T)  \
> +  TEST_OP (SUB, TYPE, PRED_TYPE, T)  \
> +  TEST_OP (MUL, TYPE, PRED_TYPE, T)  \
> +  TEST_OP (MAX, TYPE, PRED_TYPE, T)  \
> +  TEST_OP (MIN, TYPE, PRED_TYPE, T)
> +
> +#define TEST(TYPE, PTYPE, SIZE)                                   \
> +  typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE)));   \
> +  typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \
> +  TEST_ALL (TYPE##SIZE, PTYPE##SIZE, COND_OP)
> +
> +TEST (__bf16, uint16_t, 128)
> +
> +TEST (__bf16, uint16_t, 64)
> +
> +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +
> +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +
> +// There's no BFSUBR.
> +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c
> new file mode 100644
> index 00000000000..50ad22b81bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
> +
> +TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.0\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.0\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c
> new file mode 100644
> index 00000000000..df860333291
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
> +
> +TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_fminf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.0\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.0\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c
> new file mode 100644
> index 00000000000..0fc820fc39a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c
> @@ -0,0 +1,58 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +#define imm_p5 0.5
> +
> +#define ADD(A, B) A + B
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS, MERGE)    \
> +  void                                                                \
> +  f_##TYPE0##_##TYPE1##_##NAME##_##MERGE (TYPE0 *__restrict out, \
> +                                      TYPE0 *__restrict a,    \
> +                                      TYPE0 *__restrict b,    \
> +                                      TYPE0 *__restrict c,    \
> +                                      TYPE1 *__restrict p)    \
> +  {                                                           \
> +    for (unsigned int i = 0; i < COUNT; i++)                  \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;          \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)         \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], b_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], c_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, a_i)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, b_i)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, a_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, b_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, b_i)
> +
> +TEST_ALL (ADD, _Float16, uint64_t, 32)
> +
> +TEST_ALL (ADD, _Float16, uint32_t, 64)
> +
> +TEST_ALL (ADD, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 5 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.5\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.5\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 10 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.5\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.5\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c
> new file mode 100644
> index 00000000000..0339562d0c3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +
> +#define DIV(A, B) A / B
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)
> +
> +TEST_ALL (DIV, _Float16, uint64_t, 32)
> +
> +TEST_ALL (DIV, _Float16, uint32_t, 64)
> +
> +TEST_ALL (DIV, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c
> new file mode 100644
> index 00000000000..e7328b4fa9d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +
> +#define MAX(A, B) (A > B) ? A : B
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
> +
> +TEST_ALL (MAX, _Float16, uint64_t, 32)
> +
> +TEST_ALL (MAX, _Float16, uint32_t, 64)
> +
> +TEST_ALL (MAX, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.0\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.0\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c
> new file mode 100644
> index 00000000000..43bf97439ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +
> +#define MIN(A, B) (A < B) ? A : B
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
> +
> +TEST_ALL (MIN, _Float16, uint64_t, 32)
> +
> +TEST_ALL (MIN, _Float16, uint32_t, 64)
> +
> +TEST_ALL (MIN, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.0\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.0\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c
> new file mode 100644
> index 00000000000..181f1709eeb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +#define imm_p5 0.5
> +
> +#define MUL(A, B) A * B
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i)
> +
> +TEST_ALL (MUL, _Float16, uint64_t, 32)
> +
> +TEST_ALL (MUL, _Float16, uint32_t, 64)
> +
> +TEST_ALL (MUL, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.5\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.5\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c
> new file mode 100644
> index 00000000000..9dbea43c015
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c
> @@ -0,0 +1,53 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +#define c_i c[i]
> +#define imm_p5 0.5
> +
> +#define SUBR(A, B) B - A
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)         \
> +  void                                                               \
> +  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,      \
> +                                      TYPE0 *__restrict a,   \
> +                                      TYPE0 *__restrict b,   \
> +                                      TYPE0 *__restrict c,   \
> +                                      TYPE1 *__restrict p)   \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;         \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)    \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)   \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i)
> +
> +TEST_ALL (SUBR, _Float16, uint64_t, 32)
> +
> +TEST_ALL (SUBR, _Float16, uint32_t, 64)
> +
> +TEST_ALL (SUBR, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 3 } } */
> +
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #0.5\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, #1.0\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 6 } } */
> +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #0.5\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, #1.0\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */

Re: [PATCH 10/14] aarch64: Relaxed SEL combiner patterns for unpacked SVE FP binary arithmetic

Reply via email to