[PATCH 11/14] aarch64: Add support for unpacked SVE FP conditional binary arithmetic

Spencer Abson Mon, 02 Jun 2025 03:09:03 -0700

This patch extends the expander for conditional smax, smin, add, sub,
mul, min, max, and div to support partial SVE FP modes.


The natural mask supplied to the unpacked operation leaves the undefined
elements in each container unpredicated.  This expansion modifies this
mask to explicitly disable these elements.

gcc/ChangeLog:

        * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred):
        Declare.
        * config/aarch64/aarch64-sve.md (and<mode>3):  Change this to...
        (@and<mode>3): ...this, to have gen_and3.
        (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16,
        use aarch64_predicate_operand.
        (*cond_<optab><mode>_2_strict): Likewise.
        (*cond_<optab><mode>_3_strict): Likewise.
        (*cond_<optab><mode>_any_strict): Likwise.
        (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F,
        use aarch64_predicate_operand.
        (*cond_<optab><mode>_any_const_strict): Likewise.
        (*cond_sub<mode>_3_const_strict): Likwise.
        (*cond_sub<mode>_const_strict): Likewise.
        (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update
        the comment here.
        * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred):
        New function.  Helper to mask the predicate in conditional expanders.

gcc/testsuite/ChangeLog:

        * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test.
        * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise.
        * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise.
---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64-sve.md             | 153 +++++++++---------
 gcc/config/aarch64/aarch64.cc                 |  27 ++++
 .../aarch64/sve/unpacked_cond_binary_bf16_2.C |  18 +++
 .../sve/unpacked_cond_builtin_fmax_2.c        |  20 +++
 .../sve/unpacked_cond_builtin_fmin_2.c        |  20 +++
 .../aarch64/sve/unpacked_cond_fadd_2.c        |  24 +++
 .../aarch64/sve/unpacked_cond_fdiv_2.c        |  18 +++
 .../aarch64/sve/unpacked_cond_fmaxnm_2.c      |  20 +++
 .../aarch64/sve/unpacked_cond_fminnm_2.c      |  20 +++
 .../aarch64/sve/unpacked_cond_fmul_2.c        |  18 +++
 .../aarch64/sve/unpacked_cond_fsubr_2.c       |  22 +++
 12 files changed, 289 insertions(+), 72 deletions(-)
 create mode 100644 
gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 1e3ed80e10b..3a7169dc626 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 rtx aarch64_sve_packed_pred (machine_mode);
 rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
 void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
 bool aarch64_expand_maskloadstore (rtx *, machine_mode);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index d111e0b9261..1ed2d065c15 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5569,18 +5569,22 @@
 
 ;; Predicated floating-point operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
-             (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+             (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+             (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
             SVE_COND_FP_BINARY)
-          (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+  {
+    if (rtx pred = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]))
+      operands[1] = pred;
+  }
 )
 
 ;; Predicated floating-point operations, merging with the first input.
@@ -5608,14 +5612,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5651,14 +5655,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5694,14 +5698,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_3_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 3)]
          UNSPEC_SEL))]
@@ -5758,16 +5762,16 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
-          (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -5832,16 +5836,16 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 4   ]
@@ -5917,14 +5921,14 @@
 )
 
 (define_insn "*cond_add<mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5979,16 +5983,16 @@
 )
 
 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
@@ -6230,14 +6234,14 @@
 )
 
 (define_insn "*cond_sub<mode>_3_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
           (match_dup 3)]
          UNSPEC_SEL))]
@@ -6287,16 +6291,16 @@
 )
 
 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
   {@ [ cons: =0 , 1   , 3 , 4   ]
@@ -6877,7 +6881,7 @@
 ;; Predicate AND.  We can reuse one of the inputs as the GP.
 ;; Doubling the second operand is the preferred implementation
 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
   [(set (match_operand:PRED_ALL 0 "register_operand")
        (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
                      (match_operand:PRED_ALL 2 "register_operand")))]
@@ -8165,20 +8169,25 @@
 ;;
 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
 ;; the container size or the element size.  If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector.  Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
+;; it would ignore bits of the predicate that can be undefined, but would copy
+;; the upper (undefined) bits of each container along with the defined bits.
+;; If SEL used the element size, it would use bits from the predicate that can
+;; be undefined to select between undefined elements in each input vector.
+;; Thus the only difference is whether the undefined bits in a container always
+;; come from the same input as the defined bits, or whether the choice can vary
 ;; independently of the defined bits.
 ;;
 ;; For the other instructions, using the element size is more natural,
 ;; so we do that for SEL as well.
+;;
+;; The use of 'aarch64_predicate_operand' here is only to support the FP 
arithmetic/
+;; UNSPEC_SEL combiner patterns.  As with those operations, any predicate bits 
that
+;; are insignificant to the data mode will have no effect on the operation's 
result.
+;;
 (define_insn "*vcond_mask_<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand")
        (unspec:SVE_ALL
-         [(match_operand:<VPRED> 3 "register_operand")
+         [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
           (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
           (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 287de0f5ae4..d38b108c5f4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3893,6 +3893,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx 
*strictness)
    return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
 }
 
+/* If DATA_MODE is a partial vector mode, emit a sequence of insns to
+   zero-out the predicate bits of an existing natural GP, PRED, associated
+   with the undefined elements in each container
+
+   Return NULL_RTX if no insns were emitted.  That is, if DATA_MODE is not
+   a partial vector mode, or if we don't need to prevent the operation from
+   interpreting undefined elements.  Otherwise, return the new predicate.  */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+  if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+    {
+      /* Control the data as if the vector was fully packed.  */
+      rtx mask = aarch64_sve_packed_pred (data_mode);
+      machine_mode pmode = GET_MODE (mask);
+
+      /* Mask the existing predicate.  */
+      rtx dst = gen_reg_rtx (pmode);
+      emit_insn (gen_and3 (pmode, dst, mask,
+                          gen_lowpart (pmode, pred)));
+      return dst;
+    }
+
+  return NULL_RTX;
+}
+
 /* Emit a comparison CMP between OP0 and OP1, both of which have mode
    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
    Use TARGET as the target register if nonnull and convenient.  */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
new file mode 100644
index 00000000000..02880efa333
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
@@ -0,0 +1,18 @@
+/* { dg-do compile }*/
+/* { dg-options "-O -ffinite-math-only -fno-signed-zeros 
-msve-vector-bits=2048 " } */
+
+#include "unpacked_cond_binary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */
+/* { dg-final { scan-assembler-times {\tand} 30 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+// There's no BFSUBR.
+/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
new file mode 100644
index 00000000000..8c8f082562f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include "unpacked_cond_builtin_fmax_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
new file mode 100644
index 00000000000..eda27eedf1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */
+
+#include "unpacked_cond_builtin_fmin_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
new file mode 100644
index 00000000000..c972c7d20b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fadd_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */
+/* { dg-final { scan-assembler-times {\tand} 33 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
new file mode 100644
index 00000000000..de70fabef7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fdiv_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tand} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
new file mode 100644
index 00000000000..1c9a110e09b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-signed-zeros 
-ffinite-math-only" } */
+
+#include "unpacked_cond_fmaxnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
new file mode 100644
index 00000000000..4aa913450f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 -fno-signed-zeros 
-ffinite-math-only" } */
+
+#include "unpacked_cond_fminnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
new file mode 100644
index 00000000000..a45c570eaad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fmul_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tand} 15 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c
new file mode 100644
index 00000000000..b154fa5697d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fsubr_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
-- 
2.34.1

[PATCH 11/14] aarch64: Add support for unpacked SVE FP conditional binary arithmetic

Reply via email to