SVE's INDEX instruction can be used to populate vectors by values starting from
"base" and incremented by "step" for each subsequent value. We can take
advantage of it to generate vector constants if TARGET_SVE is available and the
base and step values are within [-16, 15].

For example, with the following function:

typedef int v4si __attribute__ ((vector_size (16)));
v4si
f_v4si (void)
{
  return (v4si){ 0, 1, 2, 3 };
}

GCC currently generates:

f_v4si:
        adrp    x0, .LC4
        ldr     q0, [x0, #:lo12:.LC4]
        ret

.LC4:
        .word   0
        .word   1
        .word   2
        .word   3

With this patch, we generate an INDEX instruction instead if TARGET_SVE is
available.

f_v4si:
        index   z0.s, #0, #1
        ret

        PR target/113328

gcc/ChangeLog:

        * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): Improve
        handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE is
        available.
        (aarch64_output_simd_mov_immediate): Likewise.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
        SVE's INDEX instruction.
        * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
        * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
        * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
        * gcc.target/aarch64/sve/vec_init_3.c: New test.

Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com>
---
 gcc/config/aarch64/aarch64.cc                 | 12 ++-
 .../aarch64/sve/acle/general/dupq_1.c         |  3 +-
 .../aarch64/sve/acle/general/dupq_2.c         |  3 +-
 .../aarch64/sve/acle/general/dupq_3.c         |  3 +-
 .../aarch64/sve/acle/general/dupq_4.c         |  3 +-
 .../gcc.target/aarch64/sve/vec_init_3.c       | 99 +++++++++++++++++++
 6 files changed, 114 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 27e24ba70ab..6b3ca57d0eb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22991,7 +22991,7 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
   if (CONST_VECTOR_P (op)
       && CONST_VECTOR_DUPLICATE_P (op))
     n_elts = CONST_VECTOR_NPATTERNS (op);
-  else if ((vec_flags & VEC_SVE_DATA)
+  else if (which == AARCH64_CHECK_MOV && TARGET_SVE
           && const_vec_series_p (op, &base, &step))
     {
       gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
@@ -25249,6 +25249,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, 
unsigned width,
 
   if (which == AARCH64_CHECK_MOV)
     {
+      if (info.insn == simd_immediate_info::INDEX)
+       {
+         gcc_assert (TARGET_SVE);
+         snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
+                   HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
+                   element_char, INTVAL (info.u.index.base),
+                   INTVAL (info.u.index.step));
+         return templ;
+       }
+
       mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
       shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
                  ? "msl" : "lsl");
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
index 216699b0536..0940bedd0dd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index d494943a275..218a6601337 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
index 4bc8259df07..245d43b75b5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler 
{\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index 6f9f9f2f22f..cbee6f27b62 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler 
{\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
new file mode 100644
index 00000000000..25910dbfa1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
@@ -0,0 +1,99 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+**     index   z0\.b, #0, #1
+**     ret
+*/
+v16qi
+f_v16qi (void)
+{
+  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+**     index   z0\.b, #0, #1
+**     ret
+*/
+v8qi
+f_v8qi (void)
+{
+  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+**     index   z0\.h, #0, #1
+**     ret
+*/
+v8hi
+f_v8hi (void)
+{
+  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+**     index   z0\.h, #0, #1
+**     ret
+*/
+v4hi
+f_v4hi (void)
+{
+  return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+**     index   z0\.s, #0, #1
+**     ret
+*/
+v4si
+f_v4si (void)
+{
+  return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+**     index   z0\.s, #0, #1
+**     ret
+*/
+v2si
+f_v2si (void)
+{
+  return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+**     index   z0\.d, #0, #1
+**     ret
+*/
+v2di
+f_v2di (void)
+{
+  return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+**     index   z0\.s, #3, #-4
+**     ret
+*/
+v4si
+g_v4si (void)
+{
+  return (v4si){ 3, -1, -5, -9 };
+}
-- 
2.17.1

Reply via email to