[committed][AArch64] Add scatter stores for partial SVE modes

Richard Sandiford Sat, 16 Nov 2019 03:32:05 -0800

This patch adds support for scatter stores of partial vectors,
where the vector base or offset elements can be wider than the
elements being stored.


Tested on aarch64-linux-gnu and applied as r278347.

Richard


2019-11-16  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/aarch64-sve.md
        (scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to...
        (scatter_store<SVE_24:mode><v_int_container>): ...this.
        (mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to...
        (mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this.
        (mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to...
        (mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this.
        (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New
        pattern.
        (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to...
        (*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this.
        (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to...
        (*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this.

gcc/testsuite/
        * gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0.
        (TEST_ALL): Add tests for 8-bit and 16-bit elements.
        * gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly.
        * gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0.
        (TEST_ALL): Add tests for 8-bit and 16-bit elements.
        * gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly.
        * gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0.
        (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements.
        * gcc.target/aarch64/sve/scatter_store_8.c: New test.
        * gcc.target/aarch64/sve/scatter_store_9.c: Likewise.

Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md   2019-11-16 11:26:06.895163107 +0000
+++ gcc/config/aarch64/aarch64-sve.md   2019-11-16 11:28:29.386158694 +0000
@@ -2135,15 +2135,15 @@ (define_insn "@aarch64_stnt1<mode>"
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated scatter stores.
-(define_expand "scatter_store<mode><v_int_equiv>"
+(define_expand "scatter_store<mode><v_int_container>"
   [(set (mem:BLK (scratch))
        (unspec:BLK
          [(match_dup 5)
           (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
-          (match_operand:<V_INT_EQUIV> 1 "register_operand")
+          (match_operand:<V_INT_CONTAINER> 1 "register_operand")
           (match_operand:DI 2 "const_int_operand")
           (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
-          (match_operand:SVE_FULL_SD 4 "register_operand")]
+          (match_operand:SVE_24 4 "register_operand")]
          UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   {
@@ -2153,48 +2153,74 @@ (define_expand "scatter_store<mode><v_in
 
 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
 ;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
   [(set (mem:BLK (scratch))
        (unspec:BLK
          [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, 
Upl")
-          (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, 
rk, rk")
+          (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, 
rk, rk, rk, rk")
           (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
           (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, 
Ui1, i, i")
-          (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")]
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, 
Ui1, Ui1, Ui1, i, i")
+          (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
          UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   "@
-   st1w\t%4.s, %5, [%1.s]
-   st1w\t%4.s, %5, [%1.s, #%0]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+   st1<Vesize>\t%4.s, %5, [%1.s]
+   st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
 )
 
 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
 ;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
   [(set (mem:BLK (scratch))
        (unspec:BLK
          [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
-          (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
+          (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, 
rk, rk")
           (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
           (match_operand:DI 2 "const_int_operand")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, 
i")
-          (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")]
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, 
Ui1, Ui1, i")
+          (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
          UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   "@
-   st1d\t%4.d, %5, [%1.d]
-   st1d\t%4.d, %5, [%1.d, #%0]
-   st1d\t%4.d, %5, [%0, %1.d]
-   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
+   st1<Vesize>\t%4.d, %5, [%1.d]
+   st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
 )
 
-;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw"
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite 
"*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+          (match_operand:DI 0 "register_operand" "rk, rk")
+          (unspec:VNx2DI
+            [(match_operand 6)
+             (ANY_EXTEND:VNx2DI
+               (match_operand:VNx2SI 1 "register_operand" "w, w"))]
+            UNSPEC_PRED_X)
+          (match_operand:DI 2 "const_int_operand")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+          (match_operand:SVE_2 4 "register_operand" "w, w")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
+  "&& !CONSTANT_P (operands[6])"
+  {
+    operands[6] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
   [(set (mem:BLK (scratch))
        (unspec:BLK
          [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2206,21 +2232,22 @@ (define_insn_and_rewrite "*mask_scatter_
                  (match_operand:VNx2DI 1 "register_operand" "w, w")))]
             UNSPEC_PRED_X)
           (match_operand:DI 2 "const_int_operand")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
-          (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+          (match_operand:SVE_2 4 "register_operand" "w, w")]
          UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   "@
-   st1d\t%4.d, %5, [%0, %1.d, sxtw]
-   st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]"
-  "&& !rtx_equal_p (operands[5], operands[6])"
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
+  "&& !CONSTANT_P (operands[6])"
   {
-    operands[6] = copy_rtx (operands[5]);
+    operands[6] = CONSTM1_RTX (<VPRED>mode);
   }
 )
 
-;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
   [(set (mem:BLK (scratch))
        (unspec:BLK
          [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2229,13 +2256,13 @@ (define_insn "*mask_scatter_store<mode><
             (match_operand:VNx2DI 1 "register_operand" "w, w")
             (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
           (match_operand:DI 2 "const_int_operand")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
-          (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+          (match_operand:SVE_2 4 "register_operand" "w, w")]
          UNSPEC_ST1_SCATTER))]
   "TARGET_SVE"
   "@
-   st1d\t%4.d, %5, [%0, %1.d, uxtw]
-   st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]"
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
+   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
 )
 
 ;; -------------------------------------------------------------------------
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c      2019-03-08 
18:14:29.792994691 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c      2019-11-16 
11:28:29.386158694 +0000
@@ -13,11 +13,15 @@ #define TEST_LOOP(DATA_TYPE, BITS)                          
        \
   f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
                 INDEX##BITS *indices, int n)                           \
   {                                                                    \
-    for (int i = 9; i < n; ++i)                                                
\
+    for (int i = 0; i < n; ++i)                                                
\
       dest[indices[i]] = src[i] + 1;                                   \
   }
 
 #define TEST_ALL(T)                            \
+  T (int8_t, 32)                               \
+  T (uint8_t, 32)                              \
+  T (int16_t, 32)                              \
+  T (uint16_t, 32)                             \
   T (int32_t, 32)                              \
   T (uint32_t, 32)                             \
   T (float, 32)                                        \
@@ -27,5 +31,7 @@ #define TEST_ALL(T)                           \
 
 TEST_ALL (TEST_LOOP)
 
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw 1\]\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw 2\]\n} 3 } } */
 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d, lsl 3\]\n} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c      2019-03-08 
18:14:29.764994797 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c      2019-11-16 
11:28:29.386158694 +0000
@@ -6,5 +6,7 @@ #define INDEX64 uint64_t
 
 #include "scatter_store_1.c"
 
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw 1\]\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw 2\]\n} 3 } } */
 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d, lsl 3\]\n} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c      2019-03-08 
18:14:29.768994780 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c      2019-11-16 
11:28:29.386158694 +0000
@@ -8,17 +8,20 @@ #define INDEX32 int32_t
 #define INDEX64 int64_t
 #endif
 
-/* Invoked 18 times for each data size.  */
 #define TEST_LOOP(DATA_TYPE, BITS)                                     \
   void __attribute__ ((noinline, noclone))                             \
   f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
                 INDEX##BITS *indices, int n)                           \
   {                                                                    \
-    for (int i = 9; i < n; ++i)                                                
\
+    for (int i = 0; i < n; ++i)                                                
\
       *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1;                
\
   }
 
 #define TEST_ALL(T)                            \
+  T (int8_t, 32)                               \
+  T (uint8_t, 32)                              \
+  T (int16_t, 32)                              \
+  T (uint16_t, 32)                             \
   T (int32_t, 32)                              \
   T (uint32_t, 32)                             \
   T (float, 32)                                        \
@@ -28,5 +31,7 @@ #define TEST_ALL(T)                           \
 
 TEST_ALL (TEST_LOOP)
 
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw\]\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw\]\n} 3 } } */
 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d\]\n} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c      2019-03-08 
18:14:29.772994767 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c      2019-11-16 
11:28:29.386158694 +0000
@@ -6,5 +6,7 @@ #define INDEX64 uint64_t
 
 #include "scatter_store_3.c"
 
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw\]\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw\]\n} 3 } } */
 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d\]\n} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c      2019-03-08 
18:14:29.776994751 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c      2019-11-16 
11:28:29.386158694 +0000
@@ -3,21 +3,29 @@
 
 #include <stdint.h>
 
-/* Invoked 18 times for each data size.  */
 #define TEST_LOOP(DATA_TYPE)                                           \
   void __attribute__ ((noinline, noclone))                             \
   f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src,   \
                 int n)                                                 \
   {                                                                    \
-    for (int i = 9; i < n; ++i)                                                
\
+    for (int i = 0; i < n; ++i)                                                
\
       *dest[i] = src[i] + 1;                                           \
   }
 
 #define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (uint8_t)                                  \
+  T (int16_t)                                  \
+  T (uint16_t)                                 \
+  T (int32_t)                                  \
+  T (uint32_t)                                 \
   T (int64_t)                                  \
   T (uint64_t)                                 \
   T (double)
 
 TEST_ALL (TEST_LOOP)
 
+/* We assume this isn't profitable for bytes.  */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d, p[0-7], 
\[z[0-9]+.d\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d, p[0-7], 
\[z[0-9]+.d\]\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], 
\[z[0-9]+.d\]\n} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c      2019-11-16 
11:28:29.386158694 +0000
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, INDEX##BITS mask, int n)         \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                
\
+      dest[(INDEX##BITS) (indices[i] + mask)] = src[i];                        
\
+  }
+
+#define TEST_ALL(T)                            \
+  T (int8_t, 16)                               \
+  T (uint8_t, 16)                              \
+  T (int16_t, 16)                              \
+  T (uint16_t, 16)                             \
+  T (_Float16, 16)                             \
+  T (int32_t, 16)                              \
+  T (uint32_t, 16)                             \
+  T (float, 16)                                        \
+  T (int64_t, 32)                              \
+  T (uint64_t, 32)                             \
+  T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d, sxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c      2019-11-16 
11:28:29.386158694 +0000
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "scatter_store_8.c"
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, 
z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, 
z[0-9]+.d, uxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */

[committed][AArch64] Add scatter stores for partial SVE modes

Reply via email to