[PATCH][AArch64] Add STP pattern to store a vec_concat of two 64-bit registers

Kyrill Tkachov Tue, 06 Jun 2017 01:41:29 -0700

Hi all,

On top of the previous vec_merge simplifications [1] we can add this pattern to 
perform
a store of a vec_concat of two 64-bit values in distinct registers as an STP.
This avoids constructing such a vector explicitly in a register and storing it 
as
a Q register.
This way for the code in the testcase we can generate:


construct_lane_1:
        ldp     d1, d0, [x0]
        fmov    d3, 1.0e+0
        fmov    d2, 2.0e+0
        fadd    d4, d1, d3
        fadd    d5, d0, d2
        stp     d4, d5, [x1, 32]
        ret

construct_lane_2:
        ldp     x2, x0, [x0]
        add     x3, x2, 1
        add     x4, x0, 2
        stp     x3, x4, [x1, 32]
        ret

instead of the current:
construct_lane_1:
        ldp     d0, d1, [x0]
        fmov    d3, 1.0e+0
        fmov    d2, 2.0e+0
        fadd    d0, d0, d3
        fadd    d1, d1, d2
        dup     v0.2d, v0.d[0]
        ins     v0.d[1], v1.d[0]
        str     q0, [x1, 32]
        ret

construct_lane_2:
        ldp     x2, x3, [x0]
        add     x0, x2, 1
        add     x2, x3, 2
        dup     v0.2d, x0
        ins     v0.d[1], x2
        str     q0, [x1, 32]
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for GCC 8?

Thanks,
Kyrill

[1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html
    https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html
    https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html

2017-06-06  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
    New pattern.
    * config/aarch64/constraints.md (Uml): New constraint.
    * config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
    predicate.

2017-06-06  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * gcc.target/aarch64/store_v2vec_lanes.c: New test.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index b78affe9b06ffc973888822a4fcf1ec8e80ecdf6..0847e7aff230782874565e565929c10053807839 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2817,6 +2817,18 @@ (define_insn "load_pair_lanes<mode>"
   [(set_attr "type" "neon_load1_1reg_q")]
 )
 
+(define_insn "store_pair_lanes<mode>"
+  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
+	(vec_concat:<VDBL>
+	   (match_operand:VDC 1 "register_operand" "w, r")
+	   (match_operand:VDC 2 "register_operand" "w, r")))]
+  "TARGET_SIMD"
+  "@
+   stp\\t%d1, %d2, %0
+   stp\\t%x1, %x2, %0"
+  [(set_attr "type" "neon_stp, store2")]
+)
+
 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
 ;; dest vector.
 
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index ab607b9f7488e903a14fe93e88d4c4e1fad762b3..6bdb93f07a7c8d19675971ccfc71e681fd2dae66 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -154,6 +154,15 @@ (define_memory_constraint "Ump"
        (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
 						  PARALLEL, 1)")))
 
+;; Used for storing two 64-bit values in an AdvSIMD register using an STP
+;; as a 128-bit vec_concat.
+(define_memory_constraint "Uml"
+  "@internal
+  A memory address suitable for a load/store pair operation."
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
+						   PARALLEL, 1)")))
+
 (define_memory_constraint "Utv"
   "@internal
    An address valid for loading/storing opaque structure
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 16e864765cde3bf8f54a11e5fc8db5a53606db30..c175f9d9a5f969817782bede858b1834ac642e28 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -173,6 +173,13 @@ (define_predicate "aarch64_mem_pair_operand"
        (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
 					       0)")))
 
+;; Used for storing two 64-bit values in an AdvSIMD register using an STP
+;; as a 128-bit vec_concat.
+(define_predicate "aarch64_mem_pair_lanes_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
+						   PARALLEL, 1)")))
+
 (define_predicate "aarch64_prefetch_operand"
   (match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
new file mode 100644
index 0000000000000000000000000000000000000000..6810db3c54dce81777caf062177facedb464d1d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void
+construct_lane_1 (double *y, v2df *z)
+{
+  double y0 = y[0] + 1;
+  double y1 = y[1] + 2;
+  v2df x = {y0, y1};
+  z[2] = x;
+}
+
+void
+construct_lane_2 (long long *y, v2di *z)
+{
+  long long y0 = y[0] + 1;
+  long long y1 = y[1] + 2;
+  v2di x = {y0, y1};
+  z[2] = x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+   values from consecutive memory into a 2-element vector by using
+   a Q-reg LDR.  */
+
+/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */

[PATCH][AArch64] Add STP pattern to store a vec_concat of two 64-bit registers

Reply via email to