ping
> -----Original Message----- > From: Tamar Christina > Sent: Monday, October 31, 2022 11:35 AM > To: Tamar Christina <tamar.christ...@arm.com>; gcc-patches@gcc.gnu.org > Cc: Richard Earnshaw <richard.earns...@arm.com>; nd <n...@arm.com>; > Richard Sandiford <richard.sandif...@arm.com>; Marcus Shawcroft > <marcus.shawcr...@arm.com> > Subject: RE: [PATCH 4/4]AArch64 sve2: rewrite pack + NARROWB + > NARROWB to NARROWB + NARROWT > > Ping > > > -----Original Message----- > > From: Gcc-patches <gcc-patches- > > bounces+tamar.christina=arm....@gcc.gnu.org> On Behalf Of Tamar > > Christina via Gcc-patches > > Sent: Friday, September 23, 2022 10:34 AM > > To: gcc-patches@gcc.gnu.org > > Cc: Richard Earnshaw <richard.earns...@arm.com>; nd <n...@arm.com>; > > Richard Sandiford <richard.sandif...@arm.com>; Marcus Shawcroft > > <marcus.shawcr...@arm.com> > > Subject: [PATCH 4/4]AArch64 sve2: rewrite pack + NARROWB + NARROWB > to > > NARROWB + NARROWT > > > > Hi All, > > > > This adds an RTL pattern for when two NARROWB instructions are being > > combined with a PACK. The second NARROWB is then transformed into a > > NARROWT. > > > > For the example: > > > > void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) { > > for (int i = 0; i < (n & -16); i+=1) > > pixel[i] += (pixel[i] * level) / 0xff; } > > > > we generate: > > > > addhnb z6.b, z0.h, z4.h > > addhnb z5.b, z1.h, z4.h > > addhnb z0.b, z0.h, z6.h > > addhnt z0.b, z1.h, z5.h > > add z0.b, z0.b, z2.b > > > > instead of: > > > > addhnb z6.b, z1.h, z4.h > > addhnb z5.b, z0.h, z4.h > > addhnb z1.b, z1.h, z6.h > > addhnb z0.b, z0.h, z5.h > > uzp1 z0.b, z0.b, z1.b > > add z0.b, z0.b, z2.b > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-sve2.md > > (*aarch64_sve_pack_<sve_int_op><mode>): > > New. > > * config/aarch64/iterators.md (binary_top): New. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/vect/vect-div-bitmask-4.c: New test. > > * gcc.target/aarch64/sve2/div-by-bitmask_2.c: New test. > > > > --- inline copy of patch -- > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > > b/gcc/config/aarch64/aarch64-sve2.md > > index > > > ab5dcc369481311e5bd68a1581265e1ce99b4b0f..0ee46c8b0d43467da4a6b98a > > d3c41e5d05d8cf38 100644 > > --- a/gcc/config/aarch64/aarch64-sve2.md > > +++ b/gcc/config/aarch64/aarch64-sve2.md > > @@ -1600,6 +1600,25 @@ (define_insn > > "@aarch64_sve_<sve_int_op><mode>" > > "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>" > > ) > > > > +(define_insn_and_split "*aarch64_sve_pack_<sve_int_op><mode>" > > + [(set (match_operand:<VNARROW> 0 "register_operand" "=w") > > + (unspec:<VNARROW> > > + [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") > > + (subreg:SVE_FULL_HSDI (unspec:<VNARROW> > > + [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w") > > + (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] > > + SVE2_INT_BINARY_NARROWB) 0)] > > + UNSPEC_PACK))] > > + "TARGET_SVE2" > > + "#" > > + "&& true" > > + [(const_int 0)] > > +{ > > + rtx tmp = lowpart_subreg (<VNARROW>mode, operands[1], > > <MODE>mode); > > + emit_insn (gen_aarch64_sve > > (<SVE2_INT_BINARY_NARROWB:binary_top>, <MODE>mode, > > + operands[0], tmp, operands[2], operands[3])); > > +}) > > + > > ;; > > ---------------------------------------------------------------------- > > --- > > ;; ---- [INT] Narrowing right shifts > > ;; > > ---------------------------------------------------------------------- > > --- diff --git a/gcc/config/aarch64/iterators.md > > b/gcc/config/aarch64/iterators.md index > > > 0dd9dc66f7ccd78acacb759662d0cd561cd5b4ef..37d8161a33b1c399d80be82af > > a67613a087389d4 100644 > > --- a/gcc/config/aarch64/iterators.md > > +++ b/gcc/config/aarch64/iterators.md > > @@ -3589,6 +3589,11 @@ (define_int_attr brk_op [(UNSPEC_BRKA "a") > > (UNSPEC_BRKB "b") > > > > (define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT > > "pnext")]) > > > > +(define_int_attr binary_top [(UNSPEC_ADDHNB "UNSPEC_ADDHNT") > > + (UNSPEC_RADDHNB "UNSPEC_RADDHNT") > > + (UNSPEC_RSUBHNB "UNSPEC_RSUBHNT") > > + (UNSPEC_SUBHNB "UNSPEC_SUBHNT")]) > > + > > (define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb") > > (UNSPEC_ADCLT "adclt") > > (UNSPEC_ADDHNB "addhnb") > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c > > b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..0df08bda6fd3e33280307ea15 > > c82dd9726897cfd > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c > > @@ -0,0 +1,26 @@ > > +/* { dg-require-effective-target vect_int } */ > > +/* { dg-additional-options "-fno-vect-cost-model" { target > > +aarch64*-*-* } } */ > > + > > +#include <stdint.h> > > +#include "tree-vect.h" > > + > > +#define N 50 > > +#define TYPE uint32_t > > + > > +__attribute__((noipa, noinline, optimize("O1"))) void fun1(TYPE* > > +restrict pixel, TYPE level, int n) { > > + for (int i = 0; i < n; i+=1) > > + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; } > > + > > +__attribute__((noipa, noinline, optimize("O3"))) void fun2(TYPE* > > +restrict pixel, TYPE level, int n) { > > + for (int i = 0; i < n; i+=1) > > + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; } > > + > > +#include "vect-div-bitmask.h" > > + > > +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: > > +detected" "vect" { target aarch64*-*-* } } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c > > b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..cddcebdf15ecaa9dc515f58cdb > > ced36c8038db1b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c > > @@ -0,0 +1,56 @@ > > +/* { dg-do compile } */ > > +/* { dg-additional-options "-O2 -std=c99" } */ > > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } > > +} */ > > + > > +#include <stdint.h> > > + > > +/* > > +** draw_bitmap1: > > +** ... > > +** addhnb z6.b, z0.h, z4.h > > +** addhnb z5.b, z1.h, z4.h > > +** addhnb z0.b, z0.h, z6.h > > +** addhnt z0.b, z1.h, z5.h > > +** ... > > +*/ > > +void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) { > > + for (int i = 0; i < (n & -16); i+=1) > > + pixel[i] += (pixel[i] * level) / 0xff; } > > + > > +void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n) { > > + for (int i = 0; i < (n & -16); i+=1) > > + pixel[i] += (pixel[i] * level) / 0xfe; } > > + > > +/* > > +** draw_bitmap3: > > +** ... > > +** addhnb z6.h, z0.s, z4.s > > +** addhnb z5.h, z1.s, z4.s > > +** addhnb z0.h, z0.s, z6.s > > +** addhnt z0.h, z1.s, z5.s > > +** ... > > +*/ > > +void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n) { > > + for (int i = 0; i < (n & -16); i+=1) > > + pixel[i] += (pixel[i] * level) / 0xffffU; } > > + > > +/* > > +** draw_bitmap4: > > +** ... > > +** addhnb z6.s, z0.d, z4.d > > +** addhnb z5.s, z1.d, z4.d > > +** addhnb z0.s, z0.d, z6.d > > +** addhnt z0.s, z1.d, z5.d > > +** ... > > +*/ > > +void draw_bitmap4(uint32_t* restrict pixel, uint32_t level, int n) { > > + for (int i = 0; i < (n & -16); i+=1) > > + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; } > > > > > > > > > > --