Some instructions that were previously restricted to streaming mode
can also be used in non-streaming mode with SVE2.1.  This patch adds
support for those, as well as the usual new-extension boilerplate.
A later patch will add the feature macro.

gcc/
        * config/aarch64/aarch64-option-extensions.def (sve2p1): New extension.
        * config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions
        that are common to both SVE2p1 and SME.
        * config/aarch64/aarch64.h (TARGET_SVE2p1): New macro.
        (TARGET_SVE2p1_OR_SME): Likewise.
        * config/aarch64/aarch64-sve2.md
        (@aarch64_sve_psel<BHSD_BITS>): Require TARGET_SVE2p1_OR_SME
        instead of TARGET_STREAMING.
        (*aarch64_sve_psel<BHSD_BITS>_plus): Likewise.
        (@aarch64_sve_<su>clamp<mode>): Likewise.
        (*aarch64_sve_<su>clamp<mode>_x): Likewise.
        (@aarch64_pred_<optab><mode>): Likewise.
        (@cond_<optab><mode>): Likewise.

gcc/testsuite/
        * lib/target-supports.exp
        (check_effective_target_aarch64_asm_sve2p1_ok): New procedure.
        * gcc.target/aarch64/sve/clamp_1.c: New test.
        * gcc.target/aarch64/sve/clamp_2.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_s16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_s32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_s64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_s8.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_u16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_u32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_u64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/clamp_u8.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_bf16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_f16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_f32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_f64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_s16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_s32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_s64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_s8.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_u16.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_u32.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_u64.c: Likewise.
        * gcc.target/aarch64/sve2/acle/asm/revd_u8.c: Likewise.
---
 .../aarch64/aarch64-option-extensions.def     |  2 +
 .../aarch64/aarch64-sve-builtins-sve2.def     |  2 +-
 gcc/config/aarch64/aarch64-sve2.md            | 12 +--
 gcc/config/aarch64/aarch64.h                  |  9 ++
 .../gcc.target/aarch64/sve/clamp_1.c          | 40 ++++++++
 .../gcc.target/aarch64/sve/clamp_2.c          | 34 +++++++
 .../aarch64/sve2/acle/asm/clamp_s16.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s32.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s64.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s8.c          | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u16.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u32.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u64.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u8.c          | 46 +++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b16.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b32.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b64.c     | 84 +++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b8.c      | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c16.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c32.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c64.c     | 84 +++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c8.c      | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_bf16.c         | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s8.c           | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u8.c           | 80 ++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         | 10 ++
 35 files changed, 2156 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u8.c

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 8279f5a76ea..c9d419afc8f 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -192,6 +192,8 @@ AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), 
(), (), "svesm4")
 
 AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4))
 
+AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "")
+
 AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
 
 AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "")
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 12548fe39cb..5cc32aa8871 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -220,7 +220,7 @@ DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
 DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS streaming_only (0)
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0)
 DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
 DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none)
 DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index a7b29daeba4..fd4bd42b6d9 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -418,7 +418,7 @@ (define_insn "@aarch64_sve_psel<BHSD_BITS>"
           (match_operand:SI 3 "register_operand" "Ucj")
           (const_int BHSD_BITS)]
          UNSPEC_PSEL))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
 )
 
@@ -432,7 +432,7 @@ (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
             (match_operand:SI 4 "const_int_operand"))
           (const_int BHSD_BITS)]
          UNSPEC_PSEL))]
-  "TARGET_STREAMING
+  "TARGET_SVE2p1_OR_SME
    && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
   "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
 )
@@ -560,7 +560,7 @@ (define_insn "@aarch64_sve_<su>clamp<mode>"
            (match_operand:SVE_FULL_I 1 "register_operand")
            (match_operand:SVE_FULL_I 2 "register_operand"))
          (match_operand:SVE_FULL_I 3 "register_operand")))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, 
%3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, 
%1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -580,7 +580,7 @@ (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
               UNSPEC_PRED_X)
             (match_operand:SVE_FULL_I 3 "register_operand"))]
          UNSPEC_PRED_X))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
@@ -3182,7 +3182,7 @@ (define_insn "@aarch64_pred_<optab><mode>"
             [(match_operand:SVE_FULL 2 "register_operand")]
             UNSPEC_REVD_ONLY)]
          UNSPEC_PRED_X))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
      [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, 
%1/m, %2.q
@@ -3198,7 +3198,7 @@ (define_insn "@cond_<optab><mode>"
             UNSPEC_REVD_ONLY)
           (match_operand:SVE_FULL 3 "register_operand")]
          UNSPEC_SEL))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
      [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, 
%1/m, %2.q
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index d17f40ce22e..404efa16c28 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -338,6 +338,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
 #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING)
 
+/* SVE2p1 instructions, enabled through +sve2p1.  */
+#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1)
+
 /* SME instructions, enabled through +sme.  Note that this does not
    imply anything about the state of PSTATE.SM; instructions that require
    SME and streaming mode should use TARGET_STREAMING instead.  */
@@ -481,6 +484,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* fp8 instructions are enabled through +fp8.  */
 #define TARGET_FP8 AARCH64_HAVE_ISA (FP8)
 
+/* Combinatorial tests.  */
+
+/* There's no need to check TARGET_SME for streaming or streaming-compatible
+   functions, since streaming mode itself implies SME.  */
+#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
new file mode 100644
index 00000000000..92fef098865
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
@@ -0,0 +1,40 @@
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE)                                                     \
+  TYPE                                                                 \
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c)                                 \
+  {                                                                    \
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c);      \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c)                                 \
+  {                                                                    \
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c);      \
+  }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
new file mode 100644
index 00000000000..f96c0046465
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
@@ -0,0 +1,34 @@
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE)                                                     \
+  TYPE                                                                 \
+  untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d)                                
\
+  {                                                                    \
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d);      \
+  }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 75703ddca60..a8833d585c6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12100,6 +12100,16 @@ foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" 
"lse" "dotprod" "sve"
     }]
 }
 
+proc check_effective_target_aarch64_asm_sve2p1_ok { } {
+    if { [istarget aarch64*-*-*] } {
+       return [check_no_compiler_messages aarch64_sve2p1_assembler object {
+           __asm__ (".arch_extension sve2p1; ld1w {z0.q},p7/z,[x0]");
+       } "-march=armv8-a+sve2p1"]
+    } else {
+       return 0
+    }
+}
+
 proc check_effective_target_aarch64_small { } {
     if { [istarget aarch64*-*-*] } {
        return [check_no_compiler_messages aarch64_small object {
-- 
2.25.1

Attachment: tests.diff.xz
Description: application/xz

Reply via email to