Greetings,

This patch adds support to combine:

1) ushr and add into usra, example:

ushr    v0.16b, v0.16b, 2
add     v0.16b, v0.16b, v2.16b
---
usra    v2.16b, v0.16b, 2

2) sshr and add into ssra, example:

sshr    v1.16b, v1.16b, 2
add     v1.16b, v1.16b, v3.16b
---
ssra    v3.16b, v1.16b, 2

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk? If yes, I don't have any commit rights,
so can someone please commit it on my behalf.

Cheers,
Syl

gcc/ChangeLog:

2019-05-30  Sylvia Taylor  <sylvia.tay...@arm.com>

        * config/aarch64/aarch64-simd.md
        (*aarch64_simd_sra<mode>): New.
        * config/aarch64/iterators.md
        (SHIFTRT): New iterator.
        (sra_op): New attribute.

gcc/testsuite/ChangeLog:

2019-05-30  Sylvia Taylor  <sylvia.tay...@arm.com>

        * gcc.target/aarch64/simd/ssra.c: New test.
        * gcc.target/aarch64/simd/usra.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..502ac5f3b45a1da059bb07701150a531091378ed
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -953,6 +953,18 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
+(define_insn "*aarch64_simd_sra<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (plus:VDQ_I
+          (SHIFTRT:VDQ_I
+               (match_operand:VDQ_I 1 "register_operand" "w")
+               (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+          (match_operand:VDQ_I 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
 (define_insn "aarch64_simd_imm_shl<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -3110,22 +3122,22 @@
     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
   }
-  [(set_attr "type" "neon_to_gp<q>")]
-)
-
-(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-       (zero_extend:GPI
-         (vec_select:<VEL>
-           (match_operand:VDQQH 1 "register_operand" "w")
-           (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_SIMD"
-  {
-    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
-                                          INTVAL (operands[2]));
-    return "umov\\t%w0, %1.<Vetype>[%2]";
-  }
-  [(set_attr "type" "neon_to_gp<q>")]
+  [(set_attr "type" "neon_to_gp<q>")]
+)
+
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (zero_extend:GPI
+         (vec_select:<VEL>
+           (match_operand:VDQQH 1 "register_operand" "w")
+           (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+                                          INTVAL (operands[2]));
+    return "umov\\t%w0, %1.<Vetype>[%2]";
+  }
+  [(set_attr "type" "neon_to_gp<q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
6caeeac80867edda29b5438efdcee475ed609ff6..6273b7be5932aef695d12e9f723a43cb6c50abe8
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1160,6 +1160,8 @@
 ;; This code iterator allows the shifts supported in arithmetic instructions
 (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
 
+(define_code_iterator SHIFTRT [ashiftrt lshiftrt])
+
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
@@ -1342,6 +1344,9 @@
 (define_code_attr shift [(ashift "lsl") (ashiftrt "asr")
                         (lshiftrt "lsr") (rotatert "ror")])
 
+;; Op prefix for shift right and accumulate.
+(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")])
+
 ;; Map shift operators onto underlying bit-field instructions
 (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
                           (lshiftrt "ubfx") (rotatert "extr")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ssra.c 
b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c
new file mode 100644
index 
0000000000000000000000000000000000000000..e9c2e04c0b88ac18be81f4ee8a872e6829af9db2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O3" } */
+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */
+
+#include <stdint.h>
+
+#define SSRA(func, vtype, n)                           \
+       void func ()                                    \
+       {                                               \
+           int i;                                      \
+           for (i = 0; i < n; i++)                     \
+           {                                           \
+               s1##vtype[i] += s2##vtype[i] >> 2;      \
+           }                                           \
+       }
+
+#define TEST_VDQ_I_MODES(FUNC)                         \
+       FUNC (test_v8qi_v16qi, _char, 16)               \
+       FUNC (test_v4hi_v8h1, _short, 8)                \
+       FUNC (test_v2si_v4si, _int, 4)                  \
+       FUNC (test_v2di, _ll, 2)                        \
+
+int8_t s1_char[16], s2_char[16];
+int16_t s1_short[8], s2_short[8];
+int32_t s1_int[4], s2_int[4];
+int64_t s1_ll[2], s2_ll[2];
+
+TEST_VDQ_I_MODES(SSRA)
+
+/* { dg-final { scan-assembler "ssra" } } */
+/* { dg-final { scan-assembler-not "sshr" } } */
+
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.16b, v[0-9]+\.16b, 
[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 
1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/usra.c 
b/gcc/testsuite/gcc.target/aarch64/simd/usra.c
new file mode 100644
index 
0000000000000000000000000000000000000000..4e7446dfa4cbf48bd7154ab5fdb7ff114a2466fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/usra.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O3" } */
+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */
+
+#include <stdint.h>
+
+#define USRA(func, vtype, n)                           \
+       void func ()                                    \
+       {                                               \
+           int i;                                      \
+           for (i = 0; i < n; i++)                     \
+           {                                           \
+               u1##vtype[i] += u2##vtype[i] >> 2;      \
+           }                                           \
+       }
+
+#define TEST_VDQ_I_MODES(FUNC)                         \
+       FUNC (test_v8qi_v16qi, _char, 16)               \
+       FUNC (test_v4hi_v8h1, _short, 8)                \
+       FUNC (test_v2si_v4si, _int, 4)                  \
+       FUNC (test_v2di, _ll, 2)                        \
+
+uint8_t u1_char[16], u2_char[16];
+uint16_t u1_short[8], u2_short[8];
+uint32_t u1_int[4], u2_int[4];
+uint64_t u1_ll[2], u2_ll[2];
+
+TEST_VDQ_I_MODES(USRA)
+
+/* { dg-final { scan-assembler "usra" } } */
+/* { dg-final { scan-assembler-not "ushr" } } */
+
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.16b, v[0-9]+\.16b, 
[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 
1 } } */

Reply via email to