> On Thu, Sep 12, 2024 at 2:53 AM Pengxuan Zheng > <quic_pzh...@quicinc.com> wrote: > > > > SVE's INDEX instruction can be used to populate vectors by values > > starting from "base" and incremented by "step" for each subsequent > > value. We can take advantage of it to generate vector constants if > > TARGET_SVE is available and the base and step values are within [-16, 15]. > > Are there multiplication by or addition of scalar immediate instructions to > enhance this with two-instruction sequences?
No, Richard, I can't think of any equivalent two-instruction sequences. Thanks, Pengxuan > > > For example, with the following function: > > > > typedef int v4si __attribute__ ((vector_size (16))); v4si f_v4si > > (void) { > > return (v4si){ 0, 1, 2, 3 }; > > } > > > > GCC currently generates: > > > > f_v4si: > > adrp x0, .LC4 > > ldr q0, [x0, #:lo12:.LC4] > > ret > > > > .LC4: > > .word 0 > > .word 1 > > .word 2 > > .word 3 > > > > With this patch, we generate an INDEX instruction instead if > > TARGET_SVE is available. > > > > f_v4si: > > index z0.s, #0, #1 > > ret > > > > PR target/113328 > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): > Improve > > handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE > is > > available. > > (aarch64_output_simd_mov_immediate): Likewise. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use > > SVE's INDEX instruction. > > * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise. > > * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise. > > * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise. > > * gcc.target/aarch64/sve/vec_init_3.c: New test. > > > > Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com> > > --- > > gcc/config/aarch64/aarch64.cc | 12 ++- > > .../aarch64/sve/acle/general/dupq_1.c | 3 +- > > .../aarch64/sve/acle/general/dupq_2.c | 3 +- > > .../aarch64/sve/acle/general/dupq_3.c | 3 +- > > .../aarch64/sve/acle/general/dupq_4.c | 3 +- > > .../gcc.target/aarch64/sve/vec_init_3.c | 99 +++++++++++++++++++ > > 6 files changed, 114 insertions(+), 9 deletions(-) create mode > > 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c > > > > diff --git a/gcc/config/aarch64/aarch64.cc > > b/gcc/config/aarch64/aarch64.cc index 27e24ba70ab..6b3ca57d0eb 100644 > > --- a/gcc/config/aarch64/aarch64.cc > > +++ b/gcc/config/aarch64/aarch64.cc > > @@ -22991,7 +22991,7 @@ aarch64_simd_valid_immediate (rtx op, > simd_immediate_info *info, > > if (CONST_VECTOR_P (op) > > && CONST_VECTOR_DUPLICATE_P (op)) > > n_elts = CONST_VECTOR_NPATTERNS (op); > > - else if ((vec_flags & VEC_SVE_DATA) > > + else if (which == AARCH64_CHECK_MOV && TARGET_SVE > > && const_vec_series_p (op, &base, &step)) > > { > > gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); @@ > > -25249,6 +25249,16 @@ aarch64_output_simd_mov_immediate (rtx > > const_vector, unsigned width, > > > > if (which == AARCH64_CHECK_MOV) > > { > > + if (info.insn == simd_immediate_info::INDEX) > > + { > > + gcc_assert (TARGET_SVE); > > + snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #" > > + HOST_WIDE_INT_PRINT_DEC ", #" > HOST_WIDE_INT_PRINT_DEC, > > + element_char, INTVAL (info.u.index.base), > > + INTVAL (info.u.index.step)); > > + return templ; > > + } > > + > > mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi"; > > shift_op = (info.u.mov.modifier == simd_immediate_info::MSL > > ? "msl" : "lsl"); > > diff --git > > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c > > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c > > index 216699b0536..0940bedd0dd 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c > > @@ -10,7 +10,6 @@ dupq (int x) > > return svdupq_s32 (x, 1, 2, 3); > > } > > > > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ > > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */ > > /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ > > /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} > > } } */ > > -/* { dg-final { scan-assembler > > {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */ diff --git > > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c > > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c > > index d494943a275..218a6601337 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c > > @@ -10,7 +10,6 @@ dupq (int x) > > return svdupq_s32 (x, 1, 2, 3); > > } > > > > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ > > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */ > > /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ > > /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} > > } } */ > > -/* { dg-final { scan-assembler > > {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */ diff --git > > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c > > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c > > index 4bc8259df07..245d43b75b5 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c > > @@ -10,7 +10,6 @@ dupq (int x) > > return svdupq_s32 (0, 1, x, 3); > > } > > > > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ > > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */ > > /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ > > /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} > > } } */ > > -/* { dg-final { scan-assembler > > {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */ > > diff --git > > a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c > > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c > > index 6f9f9f2f22f..cbee6f27b62 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c > > @@ -10,7 +10,6 @@ dupq (int x) > > return svdupq_s32 (0, 1, x, 3); > > } > > > > -/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ > > +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */ > > /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ > > /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} > > } } */ > > -/* { dg-final { scan-assembler > > {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c > > b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c > > new file mode 100644 > > index 00000000000..25910dbfa1f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c > > @@ -0,0 +1,99 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > + > > +typedef char v16qi __attribute__ ((vector_size (16))); typedef char > > +v8qi __attribute__ ((vector_size (8))); typedef short v8hi > > +__attribute__ ((vector_size (16))); typedef short v4hi __attribute__ > > +((vector_size (8))); typedef int v4si __attribute__ ((vector_size > > +(16))); typedef int v2si __attribute__ ((vector_size (8))); typedef > > +long v2di __attribute__ ((vector_size (16))); > > + > > +/* > > +** f_v16qi: > > +** index z0\.b, #0, #1 > > +** ret > > +*/ > > +v16qi > > +f_v16qi (void) > > +{ > > + return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, > > +15 }; } > > + > > +/* > > +** f_v8qi: > > +** index z0\.b, #0, #1 > > +** ret > > +*/ > > +v8qi > > +f_v8qi (void) > > +{ > > + return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 }; } > > + > > +/* > > +** f_v8hi: > > +** index z0\.h, #0, #1 > > +** ret > > +*/ > > +v8hi > > +f_v8hi (void) > > +{ > > + return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 }; } > > + > > +/* > > +** f_v4hi: > > +** index z0\.h, #0, #1 > > +** ret > > +*/ > > +v4hi > > +f_v4hi (void) > > +{ > > + return (v4hi){ 0, 1, 2, 3 }; > > +} > > + > > +/* > > +** f_v4si: > > +** index z0\.s, #0, #1 > > +** ret > > +*/ > > +v4si > > +f_v4si (void) > > +{ > > + return (v4si){ 0, 1, 2, 3 }; > > +} > > + > > +/* > > +** f_v2si: > > +** index z0\.s, #0, #1 > > +** ret > > +*/ > > +v2si > > +f_v2si (void) > > +{ > > + return (v2si){ 0, 1 }; > > +} > > + > > +/* > > +** f_v2di: > > +** index z0\.d, #0, #1 > > +** ret > > +*/ > > +v2di > > +f_v2di (void) > > +{ > > + return (v2di){ 0, 1 }; > > +} > > + > > +/* > > +** g_v4si: > > +** index z0\.s, #3, #-4 > > +** ret > > +*/ > > +v4si > > +g_v4si (void) > > +{ > > + return (v4si){ 3, -1, -5, -9 }; > > +} > > -- > > 2.17.1 > >