Ping

> -----Original Message-----
> From: Gcc-patches <gcc-patches-
> bounces+tamar.christina=arm....@gcc.gnu.org> On Behalf Of Tamar
> Christina via Gcc-patches
> Sent: Friday, September 23, 2022 10:34 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Earnshaw <richard.earns...@arm.com>; nd <n...@arm.com>;
> Richard Sandiford <richard.sandif...@arm.com>; Marcus Shawcroft
> <marcus.shawcr...@arm.com>
> Subject: [PATCH 4/4]AArch64 sve2: rewrite pack + NARROWB + NARROWB to
> NARROWB + NARROWT
> 
> Hi All,
> 
> This adds an RTL pattern for when two NARROWB instructions are being
> combined with a PACK.  The second NARROWB is then transformed into a
> NARROWT.
> 
> For the example:
> 
> void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) {
>   for (int i = 0; i < (n & -16); i+=1)
>     pixel[i] += (pixel[i] * level) / 0xff; }
> 
> we generate:
> 
>         addhnb  z6.b, z0.h, z4.h
>         addhnb  z5.b, z1.h, z4.h
>         addhnb  z0.b, z0.h, z6.h
>         addhnt  z0.b, z1.h, z5.h
>         add     z0.b, z0.b, z2.b
> 
> instead of:
> 
>         addhnb  z6.b, z1.h, z4.h
>         addhnb  z5.b, z0.h, z4.h
>         addhnb  z1.b, z1.h, z6.h
>         addhnb  z0.b, z0.h, z5.h
>         uzp1    z0.b, z0.b, z1.b
>         add     z0.b, z0.b, z2.b
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * config/aarch64/aarch64-sve2.md
> (*aarch64_sve_pack_<sve_int_op><mode>):
>       New.
>       * config/aarch64/iterators.md (binary_top): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/vect/vect-div-bitmask-4.c: New test.
>       * gcc.target/aarch64/sve2/div-by-bitmask_2.c: New test.
> 
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-sve2.md
> b/gcc/config/aarch64/aarch64-sve2.md
> index
> ab5dcc369481311e5bd68a1581265e1ce99b4b0f..0ee46c8b0d43467da4a6b98a
> d3c41e5d05d8cf38 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -1600,6 +1600,25 @@ (define_insn
> "@aarch64_sve_<sve_int_op><mode>"
>    "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
>  )
> 
> +(define_insn_and_split "*aarch64_sve_pack_<sve_int_op><mode>"
> +  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
> +     (unspec:<VNARROW>
> +       [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
> +        (subreg:SVE_FULL_HSDI (unspec:<VNARROW>
> +          [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
> +           (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
> +          SVE2_INT_BINARY_NARROWB) 0)]
> +       UNSPEC_PACK))]
> +  "TARGET_SVE2"
> +  "#"
> +  "&& true"
> +  [(const_int 0)]
> +{
> +  rtx tmp = lowpart_subreg (<VNARROW>mode, operands[1],
> <MODE>mode);
> +  emit_insn (gen_aarch64_sve
> (<SVE2_INT_BINARY_NARROWB:binary_top>, <MODE>mode,
> +                           operands[0], tmp, operands[2], operands[3]));
> +})
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [INT] Narrowing right shifts
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md index
> 0dd9dc66f7ccd78acacb759662d0cd561cd5b4ef..37d8161a33b1c399d80be82af
> a67613a087389d4 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -3589,6 +3589,11 @@ (define_int_attr brk_op [(UNSPEC_BRKA "a")
> (UNSPEC_BRKB "b")
> 
>  (define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT
> "pnext")])
> 
> +(define_int_attr binary_top [(UNSPEC_ADDHNB "UNSPEC_ADDHNT")
> +                          (UNSPEC_RADDHNB "UNSPEC_RADDHNT")
> +                          (UNSPEC_RSUBHNB "UNSPEC_RSUBHNT")
> +                          (UNSPEC_SUBHNB "UNSPEC_SUBHNT")])
> +
>  (define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb")
>                            (UNSPEC_ADCLT "adclt")
>                            (UNSPEC_ADDHNB "addhnb")
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c
> b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..0df08bda6fd3e33280307ea15
> c82dd9726897cfd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c
> @@ -0,0 +1,26 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-*
> +} } */
> +
> +#include <stdint.h>
> +#include "tree-vect.h"
> +
> +#define N 50
> +#define TYPE uint32_t
> +
> +__attribute__((noipa, noinline, optimize("O1"))) void fun1(TYPE*
> +restrict pixel, TYPE level, int n) {
> +  for (int i = 0; i < n; i+=1)
> +    pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; }
> +
> +__attribute__((noipa, noinline, optimize("O3"))) void fun2(TYPE*
> +restrict pixel, TYPE level, int n) {
> +  for (int i = 0; i < n; i+=1)
> +    pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; }
> +
> +#include "vect-div-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern:
> +detected" "vect" { target aarch64*-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..cddcebdf15ecaa9dc515f58cdb
> ced36c8038db1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c
> @@ -0,0 +1,56 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -std=c99" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } }
> +*/
> +
> +#include <stdint.h>
> +
> +/*
> +** draw_bitmap1:
> +** ...
> +**   addhnb  z6.b, z0.h, z4.h
> +**   addhnb  z5.b, z1.h, z4.h
> +**   addhnb  z0.b, z0.h, z6.h
> +**   addhnt  z0.b, z1.h, z5.h
> +** ...
> +*/
> +void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) {
> +  for (int i = 0; i < (n & -16); i+=1)
> +    pixel[i] += (pixel[i] * level) / 0xff; }
> +
> +void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n) {
> +  for (int i = 0; i < (n & -16); i+=1)
> +    pixel[i] += (pixel[i] * level) / 0xfe; }
> +
> +/*
> +** draw_bitmap3:
> +** ...
> +**   addhnb  z6.h, z0.s, z4.s
> +**   addhnb  z5.h, z1.s, z4.s
> +**   addhnb  z0.h, z0.s, z6.s
> +**   addhnt  z0.h, z1.s, z5.s
> +** ...
> +*/
> +void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n) {
> +  for (int i = 0; i < (n & -16); i+=1)
> +    pixel[i] += (pixel[i] * level) / 0xffffU; }
> +
> +/*
> +** draw_bitmap4:
> +** ...
> +**   addhnb  z6.s, z0.d, z4.d
> +**   addhnb  z5.s, z1.d, z4.d
> +**   addhnb  z0.s, z0.d, z6.d
> +**   addhnt  z0.s, z1.d, z5.d
> +** ...
> +*/
> +void draw_bitmap4(uint32_t* restrict pixel, uint32_t level, int n) {
> +  for (int i = 0; i < (n & -16); i+=1)
> +    pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; }
> 
> 
> 
> 
> --

Reply via email to