On Wed, Aug 14, 2024 at 2:21 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Andrew Pinski <quic_apin...@quicinc.com> writes: > > For popcount for bytes, we don't need the reduction addition > > after the vector cnt instruction as we are only counting one > > byte's popcount. > > This changes the popcount extend to cover all ALLI rather than GPI. > > > > Changes since v1: > > * v2 - Use ALLI iterator and combine all into one pattern. > > Add new testcases popcnt[6-8].c. > > > > Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > > > PR target/113042 > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64.md (popcount<mode>2): Update pattern > > to support ALLI modes. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/aarch64/popcnt5.c: New test. > > * gcc.target/aarch64/popcnt6.c: New test. > > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > > --- > > gcc/config/aarch64/aarch64.md | 52 +++++++++++++++++++--- > > gcc/testsuite/gcc.target/aarch64/popcnt5.c | 19 ++++++++ > > gcc/testsuite/gcc.target/aarch64/popcnt6.c | 19 ++++++++ > > gcc/testsuite/gcc.target/aarch64/popcnt7.c | 18 ++++++++ > > gcc/testsuite/gcc.target/aarch64/popcnt8.c | 18 ++++++++ > > 5 files changed, 119 insertions(+), 7 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt5.c > > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt6.c > > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt7.c > > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt8.c > > Sorry for the slow review. > > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > > index 389a1906e23..dd88fd891b5 100644 > > --- a/gcc/config/aarch64/aarch64.md > > +++ b/gcc/config/aarch64/aarch64.md > > @@ -5332,28 +5332,66 @@ (define_insn "*aarch64_popcount<mode>2_cssc_insn" > > ;; MOV w0, v2.b[0] > > > > (define_expand "popcount<mode>2" > > - [(set (match_operand:GPI 0 "register_operand") > > - (popcount:GPI (match_operand:GPI 1 "register_operand")))] > > + [(set (match_operand:ALLI 0 "register_operand") > > + (popcount:ALLI (match_operand:ALLI 1 "register_operand")))] > > "TARGET_CSSC || TARGET_SIMD" > > Could we restrict this to: > > TARET_CSSC > ? GET_MODE_BITSIZE (<MODE>mode) >= 32 > : TARGET_SIMD > > > { > > + rtx in = operands[1]; > > + rtx out = operands[0]; > > + if (TARGET_CSSC > > + && (<MODE>mode == HImode > > + || <MODE>mode == QImode)) > > + { > > + rtx tmp = gen_reg_rtx (SImode); > > + rtx out1 = gen_reg_rtx (SImode); > > + if (<MODE>mode == HImode) > > + emit_insn (gen_zero_extendhisi2 (tmp, in)); > > + else > > + emit_insn (gen_zero_extendqisi2 (tmp, in)); > > + emit_insn (gen_popcountsi2 (out1, tmp)); > > + emit_move_insn (out, gen_lowpart (<MODE>mode, out1)); > > + DONE; > > + } > > ...and then skip this part (including the rtx in/out)? It should be > what target-independent code would do. > > > if (!TARGET_CSSC) > > { > > rtx v = gen_reg_rtx (V8QImode); > > rtx v1 = gen_reg_rtx (V8QImode); > > rtx in = operands[1]; > > rtx out = operands[0]; > > - if(<MODE>mode == SImode) > > + /* SImode and HImode should be zero extended to DImode. */ > > + if (<MODE>mode == SImode || <MODE>mode == HImode) > > { > > rtx tmp; > > tmp = gen_reg_rtx (DImode); > > - /* If we have SImode, zero extend to DImode, pop count does > > - not change if we have extra zeros. */ > > - emit_insn (gen_zero_extendsidi2 (tmp, in)); > > + /* If we have SImode, zero extend to DImode, > > + pop count does not change if we have extra zeros. */ > > The doubled comment seems redundant. How about making the first one: > > /* SImode and HImode should be zero extended to DImode. > popcount does not change if we have extra zeros. */ > > and deleting the second comment? > > > + if (<MODE>mode == SImode) > > + emit_insn (gen_zero_extendsidi2 (tmp, in)); > > + else > > + emit_insn (gen_zero_extendhidi2 (tmp, in)); > > in = tmp; > > I think the if body can be replaced with: > > in = convert_to_mode (DImode, in, true); > > > } > > emit_move_insn (v, gen_lowpart (V8QImode, in)); > > emit_insn (gen_popcountv8qi2 (v1, v)); > > - emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1)); > > + /* QImode, just extract from the v8qi vector. */ > > + if (<MODE>mode == QImode) > > + { > > + emit_move_insn (out, gen_lowpart (QImode, v1)); > > + } > > Nit: redundant braces. > > > + /* HI and SI, reduction is zero extended to SImode. */ > > + else if (<MODE>mode == SImode || <MODE>mode == HImode) > > + { > > + rtx out1; > > + out1 = gen_reg_rtx (SImode); > > Pre-existing, but: no need for two separate statements.
Updated patch posted: https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660473.html Thanks, Andrew > > Thanks, > Richard > > > + emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1)); > > + emit_move_insn (out, gen_lowpart (<MODE>mode, out1)); > > + } > > + /* DImode, reduction is zero extended to DImode. */ > > + else > > + { > > + gcc_assert (<MODE>mode == DImode); > > + emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1)); > > + } > > DONE; > > } > > }) > > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c > > b/gcc/testsuite/gcc.target/aarch64/popcnt5.c > > new file mode 100644 > > index 00000000000..406369d9b29 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c > > @@ -0,0 +1,19 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* PR target/113042 */ > > + > > +#pragma GCC target "+nocssc" > > + > > +/* > > +** h8: > > +** ldr b[0-9]+, \[x0\] > > +** cnt v[0-9]+.8b, v[0-9]+.8b > > +** smov w0, v[0-9]+.b\[0\] > > +** ret > > +*/ > > +/* We should not need the addv here since we only need a byte popcount. */ > > + > > +unsigned h8 (const unsigned char *a) { > > + return __builtin_popcountg (a[0]); > > +} > > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c > > b/gcc/testsuite/gcc.target/aarch64/popcnt6.c > > new file mode 100644 > > index 00000000000..e882cb24126 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c > > @@ -0,0 +1,19 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* PR target/113042 */ > > + > > +#pragma GCC target "+nocssc" > > + > > +/* > > +** h8: > > +** ldr h[0-9]+, \[x0\] > > +** cnt v[0-9]+.8b, v[0-9]+.8b > > +** addv b[0-9]+, v[0-9]+.8b > > +** fmov w0, s[0-9]+ > > +** ret > > +*/ > > + > > +unsigned h8 (const unsigned short *a) { > > + return __builtin_popcountg (a[0]); > > +} > > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt7.c > > b/gcc/testsuite/gcc.target/aarch64/popcnt7.c > > new file mode 100644 > > index 00000000000..8dfff211ae4 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt7.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* PR target/113042 */ > > + > > +#pragma GCC target "+cssc" > > + > > +/* > > +** h8: > > +** ldrb w[0-9]+, \[x0\] > > +** cnt w[0-9]+, w[0-9]+ > > +** ret > > +*/ > > +/* We should not produce any extra zero extend for this code */ > > + > > +unsigned h8 (const unsigned char *a) { > > + return __builtin_popcountg (a[0]); > > +} > > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt8.c > > b/gcc/testsuite/gcc.target/aarch64/popcnt8.c > > new file mode 100644 > > index 00000000000..66a88b6a929 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt8.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* PR target/113042 */ > > + > > +#pragma GCC target "+cssc" > > + > > +/* > > +** h8: > > +** ldrh w[0-9]+, \[x0\] > > +** cnt w[0-9]+, w[0-9]+ > > +** ret > > +*/ > > +/* We should not produce any extra zero extend for this code */ > > + > > +unsigned h8 (const unsigned short *a) { > > + return __builtin_popcountg (a[0]); > > +}