Hi James, I've managed to remove the odd redundant git diff change.
Regarding aarch64_<sur>sra_n<mode>, this patch shouldn't affect it. I am also not aware of any way of enabling this combine inside the pattern used for those intrinsics, so I kept them separate. Cheers, Syl -----Original Message----- From: James Greenhalgh <james.greenha...@arm.com> Sent: 03 June 2019 11:20 To: Sylvia Taylor <sylvia.tay...@arm.com> Cc: Richard Earnshaw <richard.earns...@arm.com>; Marcus Shawcroft <marcus.shawcr...@arm.com>; gcc-patches@gcc.gnu.org; nd <n...@arm.com> Subject: Re: [patch][aarch64]: add usra and ssra combine patterns On Thu, May 30, 2019 at 03:25:19PM +0100, Sylvia Taylor wrote: > Greetings, > > This patch adds support to combine: > > 1) ushr and add into usra, example: > > ushr v0.16b, v0.16b, 2 > add v0.16b, v0.16b, v2.16b > --- > usra v2.16b, v0.16b, 2 > > 2) sshr and add into ssra, example: > > sshr v1.16b, v1.16b, 2 > add v1.16b, v1.16b, v3.16b > --- > ssra v3.16b, v1.16b, 2 > > Bootstrapped and tested on aarch64-none-linux-gnu. > > Ok for trunk? If yes, I don't have any commit rights, so can someone > please commit it on my behalf. This patch has an unrelated change to aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode> Please revert that and resend. What changes (if any) should we make to aarch64_<sur>sra_n<mode> based on this patch, and to the vsra_n intrinsics in arm_neon.h ? Thanks, James > > Cheers, > Syl > > gcc/ChangeLog: > > 2019-05-30 Sylvia Taylor <sylvia.tay...@arm.com> > > * config/aarch64/aarch64-simd.md > (*aarch64_simd_sra<mode>): New. > * config/aarch64/iterators.md > (SHIFTRT): New iterator. > (sra_op): New attribute. > > gcc/testsuite/ChangeLog: > > 2019-05-30 Sylvia Taylor <sylvia.tay...@arm.com> > > * gcc.target/aarch64/simd/ssra.c: New test. > * gcc.target/aarch64/simd/usra.c: New test. > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > e3852c5d182b70978d7603225fce55c0b8ee2894..502ac5f3b45a1da059bb07701150 > a531091378ed 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3110,22 +3122,22 @@ > operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); > return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; > } > - [(set_attr "type" "neon_to_gp<q>")] > -) > - > -(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" > - [(set (match_operand:GPI 0 "register_operand" "=r") > - (zero_extend:GPI > - (vec_select:<VEL> > - (match_operand:VDQQH 1 "register_operand" "w") > - (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] > - "TARGET_SIMD" > - { > - operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, > - INTVAL (operands[2])); > - return "umov\\t%w0, %1.<Vetype>[%2]"; > - } > - [(set_attr "type" "neon_to_gp<q>")] > + [(set_attr "type" "neon_to_gp<q>")] > +) > + > +(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" > + [(set (match_operand:GPI 0 "register_operand" "=r") > + (zero_extend:GPI > + (vec_select:<VEL> > + (match_operand:VDQQH 1 "register_operand" "w") > + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] > + "TARGET_SIMD" > + { > + operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, > + INTVAL (operands[2])); > + return "umov\\t%w0, %1.<Vetype>[%2]"; > + } > + [(set_attr "type" "neon_to_gp<q>")] > ) > > ;; Lane extraction of a value, neither sign nor zero extension These changes should be dropped.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index eeed08e71ca0b96726cb28743ef38487a8287600..aba6af24eee1c29fe4524eb352747c94617b30c7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -986,6 +986,18 @@ [(set_attr "type" "neon_shift_imm<q>")] ) +(define_insn "*aarch64_simd_sra<mode>" + [(set (match_operand:VDQ_I 0 "register_operand" "=w") + (plus:VDQ_I + (SHIFTRT:VDQ_I + (match_operand:VDQ_I 1 "register_operand" "w") + (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")) + (match_operand:VDQ_I 3 "register_operand" "0")))] + "TARGET_SIMD" + "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2" + [(set_attr "type" "neon_shift_acc<q>")] +) + (define_insn "aarch64_simd_imm_shl<mode>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d0070b1a73218822976acb846638ee385d8a4f2c..9bc84c28bba1a6591fab2314753d5d43845b6e54 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1168,6 +1168,8 @@ ;; This code iterator allows the shifts supported in arithmetic instructions (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt]) +(define_code_iterator SHIFTRT [ashiftrt lshiftrt]) + ;; Code iterator for logical operations (define_code_iterator LOGICAL [and ior xor]) @@ -1350,6 +1352,9 @@ (define_code_attr shift [(ashift "lsl") (ashiftrt "asr") (lshiftrt "lsr") (rotatert "ror")]) +;; Op prefix for shift right and accumulate. +(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")]) + ;; Map shift operators onto underlying bit-field instructions (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx") (lshiftrt "ubfx") (rotatert "extr")]) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ssra.c b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c new file mode 100644 index 0000000000000000000000000000000000000000..5da82c247a4620d6ba4bbaea0f06422f1caf3385 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c @@ -0,0 +1,35 @@ +/* { dg-do compile { target aarch64*-*-* } } */ +/* { dg-options "-O3" } */ +/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ + +#include <stdint.h> + +#define SSRA(func, vtype, n) \ + void func () \ + { \ + int i; \ + for (i = 0; i < n; i++) \ + { \ + s1##vtype[i] += s2##vtype[i] >> 2; \ + } \ + } + +#define TEST_VDQ_I_MODES(FUNC) \ + FUNC (test_v8qi_v16qi, _char, 16) \ + FUNC (test_v4hi_v8h1, _short, 8) \ + FUNC (test_v2si_v4si, _int, 4) \ + FUNC (test_v2di, _ll, 2) \ + +int8_t s1_char[16], s2_char[16]; +int16_t s1_short[8], s2_short[8]; +int32_t s1_int[4], s2_int[4]; +int64_t s1_ll[2], s2_ll[2]; + +TEST_VDQ_I_MODES(SSRA) + +/* { dg-final { scan-assembler "ssra" } } */ +/* { dg-final { scan-assembler-not "sshr" } } */ + +/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/usra.c b/gcc/testsuite/gcc.target/aarch64/simd/usra.c new file mode 100644 index 0000000000000000000000000000000000000000..0400ef28070b376e2b5af8a9cfe75a0b65541c9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/usra.c @@ -0,0 +1,35 @@ +/* { dg-do compile { target aarch64*-*-* } } */ +/* { dg-options "-O3" } */ +/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ + +#include <stdint.h> + +#define USRA(func, vtype, n) \ + void func () \ + { \ + int i; \ + for (i = 0; i < n; i++) \ + { \ + u1##vtype[i] += u2##vtype[i] >> 2; \ + } \ + } + +#define TEST_VDQ_I_MODES(FUNC) \ + FUNC (test_v8qi_v16qi, _char, 16) \ + FUNC (test_v4hi_v8h1, _short, 8) \ + FUNC (test_v2si_v4si, _int, 4) \ + FUNC (test_v2di, _ll, 2) \ + +uint8_t u1_char[16], u2_char[16]; +uint16_t u1_short[8], u2_short[8]; +uint32_t u1_int[4], u2_int[4]; +uint64_t u1_ll[2], u2_ll[2]; + +TEST_VDQ_I_MODES(USRA) + +/* { dg-final { scan-assembler "usra" } } */ +/* { dg-final { scan-assembler-not "ushr" } } */ + +/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */