RE: [patch][aarch64]: add usra and ssra combine patterns

Sylvia Taylor Tue, 04 Jun 2019 04:24:30 -0700

Hi James,

I've managed to remove the odd redundant git diff change.


Regarding aarch64_<sur>sra_n<mode>, this patch shouldn't affect it.

I am also not aware of any way of enabling this combine inside the pattern used 
for those intrinsics, so I kept them separate.

Cheers,
Syl

-----Original Message-----
From: James Greenhalgh <james.greenha...@arm.com> 
Sent: 03 June 2019 11:20
To: Sylvia Taylor <sylvia.tay...@arm.com>
Cc: Richard Earnshaw <richard.earns...@arm.com>; Marcus Shawcroft 
<marcus.shawcr...@arm.com>; gcc-patches@gcc.gnu.org; nd <n...@arm.com>
Subject: Re: [patch][aarch64]: add usra and ssra combine patterns

On Thu, May 30, 2019 at 03:25:19PM +0100, Sylvia Taylor wrote:
> Greetings,
> 
> This patch adds support to combine:
> 
> 1) ushr and add into usra, example:
> 
> ushr  v0.16b, v0.16b, 2
> add   v0.16b, v0.16b, v2.16b
> ---
> usra  v2.16b, v0.16b, 2
> 
> 2) sshr and add into ssra, example:
> 
> sshr  v1.16b, v1.16b, 2
> add   v1.16b, v1.16b, v3.16b
> ---
> ssra  v3.16b, v1.16b, 2
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Ok for trunk? If yes, I don't have any commit rights, so can someone 
> please commit it on my behalf.

This patch has an unrelated change to
aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode> Please revert that and 
resend.

What changes (if any) should we make to aarch64_<sur>sra_n<mode> based on this 
patch, and to the vsra_n intrinsics in arm_neon.h ?

Thanks,
James

> 
> Cheers,
> Syl
> 
> gcc/ChangeLog:
> 
> 2019-05-30  Sylvia Taylor  <sylvia.tay...@arm.com>
> 
>       * config/aarch64/aarch64-simd.md
>       (*aarch64_simd_sra<mode>): New.
>       * config/aarch64/iterators.md
>       (SHIFTRT): New iterator.
>       (sra_op): New attribute.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-05-30  Sylvia Taylor  <sylvia.tay...@arm.com>
> 
>       * gcc.target/aarch64/simd/ssra.c: New test.
>       * gcc.target/aarch64/simd/usra.c: New test.

> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> e3852c5d182b70978d7603225fce55c0b8ee2894..502ac5f3b45a1da059bb07701150
> a531091378ed 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3110,22 +3122,22 @@
>      operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
>      return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
>    }
> -  [(set_attr "type" "neon_to_gp<q>")]
> -)
> -
> -(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
> -  [(set (match_operand:GPI 0 "register_operand" "=r")
> -     (zero_extend:GPI
> -       (vec_select:<VEL>
> -         (match_operand:VDQQH 1 "register_operand" "w")
> -         (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
> -  "TARGET_SIMD"
> -  {
> -    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
> -                                        INTVAL (operands[2]));
> -    return "umov\\t%w0, %1.<Vetype>[%2]";
> -  }
> -  [(set_attr "type" "neon_to_gp<q>")]
> +  [(set_attr "type" "neon_to_gp<q>")]
> +)
> +
> +(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +     (zero_extend:GPI
> +       (vec_select:<VEL>
> +         (match_operand:VDQQH 1 "register_operand" "w")
> +         (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
> +  "TARGET_SIMD"
> +  {
> +    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
> +                                        INTVAL (operands[2]));
> +    return "umov\\t%w0, %1.<Vetype>[%2]";
> +  }
> +  [(set_attr "type" "neon_to_gp<q>")]
>  )
>  
>  ;; Lane extraction of a value, neither sign nor zero extension

These changes should be dropped.

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
eeed08e71ca0b96726cb28743ef38487a8287600..aba6af24eee1c29fe4524eb352747c94617b30c7
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -986,6 +986,18 @@
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
+(define_insn "*aarch64_simd_sra<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (plus:VDQ_I
+          (SHIFTRT:VDQ_I
+               (match_operand:VDQ_I 1 "register_operand" "w")
+               (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+          (match_operand:VDQ_I 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
 (define_insn "aarch64_simd_imm_shl<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
d0070b1a73218822976acb846638ee385d8a4f2c..9bc84c28bba1a6591fab2314753d5d43845b6e54
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1168,6 +1168,8 @@
 ;; This code iterator allows the shifts supported in arithmetic instructions
 (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
 
+(define_code_iterator SHIFTRT [ashiftrt lshiftrt])
+
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
@@ -1350,6 +1352,9 @@
 (define_code_attr shift [(ashift "lsl") (ashiftrt "asr")
                         (lshiftrt "lsr") (rotatert "ror")])
 
+;; Op prefix for shift right and accumulate.
+(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")])
+
 ;; Map shift operators onto underlying bit-field instructions
 (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
                           (lshiftrt "ubfx") (rotatert "extr")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ssra.c 
b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c
new file mode 100644
index 
0000000000000000000000000000000000000000..5da82c247a4620d6ba4bbaea0f06422f1caf3385
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O3" } */
+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */
+
+#include <stdint.h>
+
+#define SSRA(func, vtype, n)                           \
+       void func ()                                    \
+       {                                               \
+           int i;                                      \
+           for (i = 0; i < n; i++)                     \
+           {                                           \
+               s1##vtype[i] += s2##vtype[i] >> 2;      \
+           }                                           \
+       }
+
+#define TEST_VDQ_I_MODES(FUNC)                         \
+       FUNC (test_v8qi_v16qi, _char, 16)               \
+       FUNC (test_v4hi_v8h1, _short, 8)                \
+       FUNC (test_v2si_v4si, _int, 4)                  \
+       FUNC (test_v2di, _ll, 2)                        \
+
+int8_t s1_char[16], s2_char[16];
+int16_t s1_short[8], s2_short[8];
+int32_t s1_int[4], s2_int[4];
+int64_t s1_ll[2], s2_ll[2];
+
+TEST_VDQ_I_MODES(SSRA)
+
+/* { dg-final { scan-assembler "ssra" } } */
+/* { dg-final { scan-assembler-not "sshr" } } */
+
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.16b, v[0-9]+\.16b, 
[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 
1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/usra.c 
b/gcc/testsuite/gcc.target/aarch64/simd/usra.c
new file mode 100644
index 
0000000000000000000000000000000000000000..0400ef28070b376e2b5af8a9cfe75a0b65541c9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/usra.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O3" } */
+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */
+
+#include <stdint.h>
+
+#define USRA(func, vtype, n)                           \
+       void func ()                                    \
+       {                                               \
+           int i;                                      \
+           for (i = 0; i < n; i++)                     \
+           {                                           \
+               u1##vtype[i] += u2##vtype[i] >> 2;      \
+           }                                           \
+       }
+
+#define TEST_VDQ_I_MODES(FUNC)                         \
+       FUNC (test_v8qi_v16qi, _char, 16)               \
+       FUNC (test_v4hi_v8h1, _short, 8)                \
+       FUNC (test_v2si_v4si, _int, 4)                  \
+       FUNC (test_v2di, _ll, 2)                        \
+
+uint8_t u1_char[16], u2_char[16];
+uint16_t u1_short[8], u2_short[8];
+uint32_t u1_int[4], u2_int[4];
+uint64_t u1_ll[2], u2_ll[2];
+
+TEST_VDQ_I_MODES(USRA)
+
+/* { dg-final { scan-assembler "usra" } } */
+/* { dg-final { scan-assembler-not "ushr" } } */
+
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.16b, v[0-9]+\.16b, 
[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 
1 } } */

RE: [patch][aarch64]: add usra and ssra combine patterns

Reply via email to