> On Thu, Sep 12, 2024 at 2:53 AM Pengxuan Zheng
> <quic_pzh...@quicinc.com> wrote:
> >
> > SVE's INDEX instruction can be used to populate vectors by values
> > starting from "base" and incremented by "step" for each subsequent
> > value. We can take advantage of it to generate vector constants if
> > TARGET_SVE is available and the base and step values are within [-16, 15].
> 
> Are there multiplication by or addition of scalar immediate instructions to
> enhance this with two-instruction sequences?

No, Richard, I can't think of any equivalent two-instruction sequences.

Thanks,
Pengxuan
> 
> > For example, with the following function:
> >
> > typedef int v4si __attribute__ ((vector_size (16))); v4si f_v4si
> > (void) {
> >   return (v4si){ 0, 1, 2, 3 };
> > }
> >
> > GCC currently generates:
> >
> > f_v4si:
> >         adrp    x0, .LC4
> >         ldr     q0, [x0, #:lo12:.LC4]
> >         ret
> >
> > .LC4:
> >         .word   0
> >         .word   1
> >         .word   2
> >         .word   3
> >
> > With this patch, we generate an INDEX instruction instead if
> > TARGET_SVE is available.
> >
> > f_v4si:
> >         index   z0.s, #0, #1
> >         ret
> >
> >         PR target/113328
> >
> > gcc/ChangeLog:
> >
> >         * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate):
> Improve
> >         handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE
> is
> >         available.
> >         (aarch64_output_simd_mov_immediate): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
> >         SVE's INDEX instruction.
> >         * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
> >         * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
> >         * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
> >         * gcc.target/aarch64/sve/vec_init_3.c: New test.
> >
> > Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com>
> > ---
> >  gcc/config/aarch64/aarch64.cc                 | 12 ++-
> >  .../aarch64/sve/acle/general/dupq_1.c         |  3 +-
> >  .../aarch64/sve/acle/general/dupq_2.c         |  3 +-
> >  .../aarch64/sve/acle/general/dupq_3.c         |  3 +-
> >  .../aarch64/sve/acle/general/dupq_4.c         |  3 +-
> >  .../gcc.target/aarch64/sve/vec_init_3.c       | 99 +++++++++++++++++++
> >  6 files changed, 114 insertions(+), 9 deletions(-)  create mode
> > 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> >
> > diff --git a/gcc/config/aarch64/aarch64.cc
> > b/gcc/config/aarch64/aarch64.cc index 27e24ba70ab..6b3ca57d0eb 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -22991,7 +22991,7 @@ aarch64_simd_valid_immediate (rtx op,
> simd_immediate_info *info,
> >    if (CONST_VECTOR_P (op)
> >        && CONST_VECTOR_DUPLICATE_P (op))
> >      n_elts = CONST_VECTOR_NPATTERNS (op);
> > -  else if ((vec_flags & VEC_SVE_DATA)
> > +  else if (which == AARCH64_CHECK_MOV && TARGET_SVE
> >            && const_vec_series_p (op, &base, &step))
> >      {
> >        gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); @@
> > -25249,6 +25249,16 @@ aarch64_output_simd_mov_immediate (rtx
> > const_vector, unsigned width,
> >
> >    if (which == AARCH64_CHECK_MOV)
> >      {
> > +      if (info.insn == simd_immediate_info::INDEX)
> > +       {
> > +         gcc_assert (TARGET_SVE);
> > +         snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
> > +                   HOST_WIDE_INT_PRINT_DEC ", #"
> HOST_WIDE_INT_PRINT_DEC,
> > +                   element_char, INTVAL (info.u.index.base),
> > +                   INTVAL (info.u.index.step));
> > +         return templ;
> > +       }
> > +
> >        mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
> >        shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
> >                   ? "msl" : "lsl");
> > diff --git
> > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
> > index 216699b0536..0940bedd0dd 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
> > @@ -10,7 +10,6 @@ dupq (int x)
> >    return svdupq_s32 (x, 1, 2, 3);
> >  }
> >
> > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
> > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
> >  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
> >  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n}
> > } } */
> > -/* { dg-final { scan-assembler
> > {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */ diff --git
> > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> > index d494943a275..218a6601337 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> > @@ -10,7 +10,6 @@ dupq (int x)
> >    return svdupq_s32 (x, 1, 2, 3);
> >  }
> >
> > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
> > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
> >  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
> >  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n}
> > } } */
> > -/* { dg-final { scan-assembler
> > {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */ diff --git
> > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
> > index 4bc8259df07..245d43b75b5 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
> > @@ -10,7 +10,6 @@ dupq (int x)
> >    return svdupq_s32 (0, 1, x, 3);
> >  }
> >
> > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
> > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
> >  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
> >  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n}
> > } } */
> > -/* { dg-final { scan-assembler
> > {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */
> > diff --git
> > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> > index 6f9f9f2f22f..cbee6f27b62 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> > @@ -10,7 +10,6 @@ dupq (int x)
> >    return svdupq_s32 (0, 1, x, 3);
> >  }
> >
> > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
> > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
> >  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
> >  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n}
> > } } */
> > -/* { dg-final { scan-assembler
> > {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> > new file mode 100644
> > index 00000000000..25910dbfa1f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> > @@ -0,0 +1,99 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" "" } } */
> > +
> > +typedef char v16qi __attribute__ ((vector_size (16))); typedef char
> > +v8qi __attribute__ ((vector_size (8))); typedef short v8hi
> > +__attribute__ ((vector_size (16))); typedef short v4hi __attribute__
> > +((vector_size (8))); typedef int v4si __attribute__ ((vector_size
> > +(16))); typedef int v2si __attribute__ ((vector_size (8))); typedef
> > +long v2di __attribute__ ((vector_size (16)));
> > +
> > +/*
> > +** f_v16qi:
> > +**     index   z0\.b, #0, #1
> > +**     ret
> > +*/
> > +v16qi
> > +f_v16qi (void)
> > +{
> > +  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
> > +15 }; }
> > +
> > +/*
> > +** f_v8qi:
> > +**     index   z0\.b, #0, #1
> > +**     ret
> > +*/
> > +v8qi
> > +f_v8qi (void)
> > +{
> > +  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 }; }
> > +
> > +/*
> > +** f_v8hi:
> > +**     index   z0\.h, #0, #1
> > +**     ret
> > +*/
> > +v8hi
> > +f_v8hi (void)
> > +{
> > +  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 }; }
> > +
> > +/*
> > +** f_v4hi:
> > +**     index   z0\.h, #0, #1
> > +**     ret
> > +*/
> > +v4hi
> > +f_v4hi (void)
> > +{
> > +  return (v4hi){ 0, 1, 2, 3 };
> > +}
> > +
> > +/*
> > +** f_v4si:
> > +**     index   z0\.s, #0, #1
> > +**     ret
> > +*/
> > +v4si
> > +f_v4si (void)
> > +{
> > +  return (v4si){ 0, 1, 2, 3 };
> > +}
> > +
> > +/*
> > +** f_v2si:
> > +**     index   z0\.s, #0, #1
> > +**     ret
> > +*/
> > +v2si
> > +f_v2si (void)
> > +{
> > +  return (v2si){ 0, 1 };
> > +}
> > +
> > +/*
> > +** f_v2di:
> > +**     index   z0\.d, #0, #1
> > +**     ret
> > +*/
> > +v2di
> > +f_v2di (void)
> > +{
> > +  return (v2di){ 0, 1 };
> > +}
> > +
> > +/*
> > +** g_v4si:
> > +**     index   z0\.s, #3, #-4
> > +**     ret
> > +*/
> > +v4si
> > +g_v4si (void)
> > +{
> > +  return (v4si){ 3, -1, -5, -9 };
> > +}
> > --
> > 2.17.1
> >

Reply via email to