On Wed, Aug 14, 2024 at 2:21 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Andrew Pinski <quic_apin...@quicinc.com> writes:
> > For popcount for bytes, we don't need the reduction addition
> > after the vector cnt instruction as we are only counting one
> > byte's popcount.
> > This changes the popcount extend to cover all ALLI rather than GPI.
> >
> > Changes since v1:
> > * v2 - Use ALLI iterator and combine all into one pattern.
> >        Add new testcases popcnt[6-8].c.
> >
> > Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> >
> >       PR target/113042
> >
> > gcc/ChangeLog:
> >
> >       * config/aarch64/aarch64.md (popcount<mode>2): Update pattern
> >       to support ALLI modes.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/aarch64/popcnt5.c: New test.
> >       * gcc.target/aarch64/popcnt6.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > ---
> >  gcc/config/aarch64/aarch64.md              | 52 +++++++++++++++++++---
> >  gcc/testsuite/gcc.target/aarch64/popcnt5.c | 19 ++++++++
> >  gcc/testsuite/gcc.target/aarch64/popcnt6.c | 19 ++++++++
> >  gcc/testsuite/gcc.target/aarch64/popcnt7.c | 18 ++++++++
> >  gcc/testsuite/gcc.target/aarch64/popcnt8.c | 18 ++++++++
> >  5 files changed, 119 insertions(+), 7 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt5.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt6.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt7.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt8.c
>
> Sorry for the slow review.
>
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 389a1906e23..dd88fd891b5 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -5332,28 +5332,66 @@ (define_insn "*aarch64_popcount<mode>2_cssc_insn"
> >  ;; MOV       w0, v2.b[0]
> >
> >  (define_expand "popcount<mode>2"
> > -  [(set (match_operand:GPI 0 "register_operand")
> > -     (popcount:GPI (match_operand:GPI 1 "register_operand")))]
> > +  [(set (match_operand:ALLI 0 "register_operand")
> > +     (popcount:ALLI (match_operand:ALLI 1 "register_operand")))]
> >    "TARGET_CSSC || TARGET_SIMD"
>
> Could we restrict this to:
>
>   TARET_CSSC
>   ? GET_MODE_BITSIZE (<MODE>mode) >= 32
>   : TARGET_SIMD
>
> >  {
> > +  rtx in = operands[1];
> > +  rtx out = operands[0];
> > +  if (TARGET_CSSC
> > +      && (<MODE>mode == HImode
> > +          || <MODE>mode == QImode))
> > +    {
> > +      rtx tmp = gen_reg_rtx (SImode);
> > +      rtx out1 = gen_reg_rtx (SImode);
> > +      if (<MODE>mode == HImode)
> > +        emit_insn (gen_zero_extendhisi2 (tmp, in));
> > +      else
> > +        emit_insn (gen_zero_extendqisi2 (tmp, in));
> > +      emit_insn (gen_popcountsi2 (out1, tmp));
> > +      emit_move_insn (out, gen_lowpart (<MODE>mode, out1));
> > +      DONE;
> > +    }
>
> ...and then skip this part (including the rtx in/out)?  It should be
> what target-independent code would do.
>
> >    if (!TARGET_CSSC)
> >      {
> >        rtx v = gen_reg_rtx (V8QImode);
> >        rtx v1 = gen_reg_rtx (V8QImode);
> >        rtx in = operands[1];
> >        rtx out = operands[0];
> > -      if(<MODE>mode == SImode)
> > +      /* SImode and HImode should be zero extended to DImode. */
> > +      if (<MODE>mode == SImode || <MODE>mode == HImode)
> >       {
> >         rtx tmp;
> >         tmp = gen_reg_rtx (DImode);
> > -       /* If we have SImode, zero extend to DImode, pop count does
> > -          not change if we have extra zeros. */
> > -       emit_insn (gen_zero_extendsidi2 (tmp, in));
> > +       /* If we have SImode, zero extend to DImode,
> > +          pop count does not change if we have extra zeros. */
>
> The doubled comment seems redundant.  How about making the first one:
>
>       /* SImode and HImode should be zero extended to DImode.
>          popcount does not change if we have extra zeros.  */
>
> and deleting the second comment?
>
> > +       if (<MODE>mode == SImode)
> > +         emit_insn (gen_zero_extendsidi2 (tmp, in));
> > +       else
> > +         emit_insn (gen_zero_extendhidi2 (tmp, in));
> >         in = tmp;
>
> I think the if body can be replaced with:
>
>   in = convert_to_mode (DImode, in, true);
>
> >       }
> >        emit_move_insn (v, gen_lowpart (V8QImode, in));
> >        emit_insn (gen_popcountv8qi2 (v1, v));
> > -      emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1));
> > +      /* QImode, just extract from the v8qi vector.  */
> > +      if (<MODE>mode == QImode)
> > +     {
> > +       emit_move_insn (out, gen_lowpart (QImode, v1));
> > +     }
>
> Nit: redundant braces.
>
> > +      /* HI and SI, reduction is zero extended to SImode. */
> > +      else if (<MODE>mode == SImode || <MODE>mode == HImode)
> > +     {
> > +       rtx out1;
> > +       out1 = gen_reg_rtx (SImode);
>
> Pre-existing, but: no need for two separate statements.

Updated patch posted:
https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660473.html

Thanks,
Andrew

>
> Thanks,
> Richard
>
> > +       emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1));
> > +       emit_move_insn (out, gen_lowpart (<MODE>mode, out1));
> > +     }
> > +      /* DImode, reduction is zero extended to DImode. */
> > +      else
> > +     {
> > +       gcc_assert (<MODE>mode == DImode);
> > +       emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1));
> > +     }
> >        DONE;
> >      }
> >  })
> > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c 
> > b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
> > new file mode 100644
> > index 00000000000..406369d9b29
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
> > +/* PR target/113042 */
> > +
> > +#pragma GCC target "+nocssc"
> > +
> > +/*
> > +** h8:
> > +**   ldr     b[0-9]+, \[x0\]
> > +**   cnt     v[0-9]+.8b, v[0-9]+.8b
> > +**   smov    w0, v[0-9]+.b\[0\]
> > +**   ret
> > +*/
> > +/* We should not need the addv here since we only need a byte popcount. */
> > +
> > +unsigned h8 (const unsigned char *a) {
> > +       return __builtin_popcountg (a[0]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c 
> > b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
> > new file mode 100644
> > index 00000000000..e882cb24126
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
> > +/* PR target/113042 */
> > +
> > +#pragma GCC target "+nocssc"
> > +
> > +/*
> > +** h8:
> > +**   ldr     h[0-9]+, \[x0\]
> > +**   cnt     v[0-9]+.8b, v[0-9]+.8b
> > +**   addv    b[0-9]+, v[0-9]+.8b
> > +**   fmov    w0, s[0-9]+
> > +**   ret
> > +*/
> > +
> > +unsigned h8 (const unsigned short *a) {
> > +       return __builtin_popcountg (a[0]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt7.c 
> > b/gcc/testsuite/gcc.target/aarch64/popcnt7.c
> > new file mode 100644
> > index 00000000000..8dfff211ae4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt7.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
> > +/* PR target/113042 */
> > +
> > +#pragma GCC target "+cssc"
> > +
> > +/*
> > +** h8:
> > +**   ldrb    w[0-9]+, \[x0\]
> > +**   cnt     w[0-9]+, w[0-9]+
> > +**   ret
> > +*/
> > +/* We should not produce any extra zero extend for this code */
> > +
> > +unsigned h8 (const unsigned char *a) {
> > +       return __builtin_popcountg (a[0]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt8.c 
> > b/gcc/testsuite/gcc.target/aarch64/popcnt8.c
> > new file mode 100644
> > index 00000000000..66a88b6a929
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt8.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
> > +/* PR target/113042 */
> > +
> > +#pragma GCC target "+cssc"
> > +
> > +/*
> > +** h8:
> > +**   ldrh    w[0-9]+, \[x0\]
> > +**   cnt     w[0-9]+, w[0-9]+
> > +**   ret
> > +*/
> > +/* We should not produce any extra zero extend for this code */
> > +
> > +unsigned h8 (const unsigned short *a) {
> > +       return __builtin_popcountg (a[0]);
> > +}

Reply via email to