On November 12, 2021 6:53:04 PM GMT+01:00, Richard Sandiford via Gcc-patches
<[email protected]> wrote:
>vect_check_gather_scatter had a binary “does this target support
>internal gather/scatter functions” test. This dates from the time when
>we only handled gathers and scatters via direct target support, with
>x86_64 using built-in functions and aarch64 using IFNs. But now that we
>can emulate gathers, we need to check whether the gather for a particular
>mode is going to be emulated or not.
>
>Without this, enabling SVE regresses emulated Advanced SIMD gather
>sequences in cases where SVE isn't used.
>
>Livermore kernel 15 can now be vectorised with Advanced SIMD when
>SVE is enabled.
>
>Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Ok.
Richard.
>Richard
>
>
>gcc/
> * genopinit.c (main): Turn supports_vec_gather_load and
> supports_vec_scatter_store into signed char arrays and remove
> supports_vec_gather_load_cached and supports_vec_scatter_store_cached.
> * optabs-query.c (supports_vec_convert_optab_p): Add a mode parameter.
> If the mode is not VOIDmode, test only for that mode.
> (supports_vec_gather_load_p): Likewise.
> (supports_vec_scatter_store_p): Likewise.
> * optabs-query.h (supports_vec_gather_load_p): Likewise.
> (supports_vec_scatter_store_p): Likewise.
> * tree-vect-data-refs.c (vect_check_gather_scatter): Pass the
> vector mode to supports_vec_gather_load_p and
> supports_vec_scatter_store_p.
>
>gcc/testsuite/
> * gfortran.dg/vect/vect-8.f90: Bump number of vectorized loops
> to 25 for SVE.
> * gcc.target/aarch64/sve/gather_load_10.c: New test.
>---
> gcc/genopinit.c | 11 ++--
> gcc/optabs-query.c | 55 +++++++++----------
> gcc/optabs-query.h | 4 +-
> .../gcc.target/aarch64/sve/gather_load_10.c | 18 ++++++
> gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 3 +-
> gcc/tree-vect-data-refs.c | 4 +-
> 6 files changed, 56 insertions(+), 39 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>
>diff --git a/gcc/genopinit.c b/gcc/genopinit.c
>index 195ddf74fa2..c6be748079d 100644
>--- a/gcc/genopinit.c
>+++ b/gcc/genopinit.c
>@@ -313,12 +313,11 @@ main (int argc, const char **argv)
> " /* Patterns that are used by optabs that are enabled for this
> target. */\n"
> " bool pat_enable[NUM_OPTAB_PATTERNS];\n"
> "\n"
>- " /* Cache if the target supports vec_gather_load for at least one
>vector\n"
>- " mode. */\n"
>- " bool supports_vec_gather_load;\n"
>- " bool supports_vec_gather_load_cached;\n"
>- " bool supports_vec_scatter_store;\n"
>- " bool supports_vec_scatter_store_cached;\n"
>+ " /* Index VOIDmode caches if the target supports vec_gather_load
>for any\n"
>+ " vector mode. Every other index X caches specifically for mode
>X.\n"
>+ " 1 means yes, -1 means no. */\n"
>+ " signed char supports_vec_gather_load[NUM_MACHINE_MODES];\n"
>+ " signed char supports_vec_scatter_store[NUM_MACHINE_MODES];\n"
> "};\n"
> "extern void init_all_optabs (struct target_optabs *);\n"
> "\n"
>diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
>index a6dd0fed610..1c0778cba55 100644
>--- a/gcc/optabs-query.c
>+++ b/gcc/optabs-query.c
>@@ -712,13 +712,16 @@ lshift_cheap_p (bool speed_p)
> return cheap[speed_p];
> }
>
>-/* Return true if vector conversion optab OP supports at least one mode,
>- given that the second mode is always an integer vector. */
>+/* If MODE is not VOIDmode, return true if vector conversion optab OP supports
>+ that mode, given that the second mode is always an integer vector.
>+ If MODE is VOIDmode, return true if OP supports any vector mode. */
>
> static bool
>-supports_vec_convert_optab_p (optab op)
>+supports_vec_convert_optab_p (optab op, machine_mode mode)
> {
>- for (int i = 0; i < NUM_MACHINE_MODES; ++i)
>+ int start = mode == VOIDmode ? 0 : mode;
>+ int end = mode == VOIDmode ? MAX_MACHINE_MODE : mode;
>+ for (int i = start; i <= end; ++i)
> if (VECTOR_MODE_P ((machine_mode) i))
> for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
> if (convert_optab_handler (op, (machine_mode) i,
>@@ -728,39 +731,35 @@ supports_vec_convert_optab_p (optab op)
> return false;
> }
>
>-/* Return true if vec_gather_load is available for at least one vector
>- mode. */
>+/* If MODE is not VOIDmode, return true if vec_gather_load is available for
>+ that mode. If MODE is VOIDmode, return true if gather_load is available
>+ for at least one vector mode. */
>
> bool
>-supports_vec_gather_load_p ()
>+supports_vec_gather_load_p (machine_mode mode)
> {
>- if (this_fn_optabs->supports_vec_gather_load_cached)
>- return this_fn_optabs->supports_vec_gather_load;
>+ if (!this_fn_optabs->supports_vec_gather_load[mode])
>+ this_fn_optabs->supports_vec_gather_load[mode]
>+ = (supports_vec_convert_optab_p (gather_load_optab, mode)
>+ || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
>+ ? 1 : -1);
>
>- this_fn_optabs->supports_vec_gather_load_cached = true;
>-
>- this_fn_optabs->supports_vec_gather_load
>- = (supports_vec_convert_optab_p (gather_load_optab)
>- || supports_vec_convert_optab_p (mask_gather_load_optab));
>-
>- return this_fn_optabs->supports_vec_gather_load;
>+ return this_fn_optabs->supports_vec_gather_load[mode] > 0;
> }
>
>-/* Return true if vec_scatter_store is available for at least one vector
>- mode. */
>+/* If MODE is not VOIDmode, return true if vec_scatter_store is available for
>+ that mode. If MODE is VOIDmode, return true if scatter_store is available
>+ for at least one vector mode. */
>
> bool
>-supports_vec_scatter_store_p ()
>+supports_vec_scatter_store_p (machine_mode mode)
> {
>- if (this_fn_optabs->supports_vec_scatter_store_cached)
>- return this_fn_optabs->supports_vec_scatter_store;
>-
>- this_fn_optabs->supports_vec_scatter_store_cached = true;
>-
>- this_fn_optabs->supports_vec_scatter_store
>- = (supports_vec_convert_optab_p (scatter_store_optab)
>- || supports_vec_convert_optab_p (mask_scatter_store_optab));
>+ if (!this_fn_optabs->supports_vec_scatter_store[mode])
>+ this_fn_optabs->supports_vec_scatter_store[mode]
>+ = (supports_vec_convert_optab_p (scatter_store_optab, mode)
>+ || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
>+ ? 1 : -1);
>
>- return this_fn_optabs->supports_vec_scatter_store;
>+ return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
> }
>
>diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
>index 876a3a6f348..6b24eecba46 100644
>--- a/gcc/optabs-query.h
>+++ b/gcc/optabs-query.h
>@@ -193,8 +193,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
> bool can_atomic_exchange_p (machine_mode, bool);
> bool can_atomic_load_p (machine_mode);
> bool lshift_cheap_p (bool);
>-bool supports_vec_gather_load_p ();
>-bool supports_vec_scatter_store_p ();
>+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
>+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
>
> /* Version of find_widening_optab_handler_and_mode that operates on
> specific mode types. */
>diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>new file mode 100644
>index 00000000000..2a07c0be866
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>@@ -0,0 +1,18 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fopenmp-simd -msve-vector-bits=256
>-fno-vect-cost-model" } */
>+
>+#include <stdint.h>
>+
>+void
>+foo (uint64_t *restrict x, uint64_t *restrict y, uint64_t *restrict index)
>+{
>+#pragma omp for simd simdlen(2)
>+ for (int i = 0; i < 128; ++i)
>+ x[i] += y[index[i]];
>+}
>+
>+/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x[0-9]+, x[0-9]+, lsl
>#?3\]} 2 } } */
>+/* { dg-final { scan-assembler-not {\tshl\tv[0-9]+\.2d,} } } */
>+/* { dg-final { scan-assembler-not {\tumov\t} } } */
>+/* { dg-final { scan-assembler {\tadd\tv[0-9]+\.2d,} } } */
>+/* { dg-final { scan-assembler {\tstr\tq[0-9]+,} } } */
>diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>index c8a7d896bac..ca72ddcffca 100644
>--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>@@ -704,6 +704,7 @@ CALL track('KERNEL ')
> RETURN
> END SUBROUTINE kernel
>
>-! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target
>aarch64*-*-* } } }
>+! { dg-final { scan-tree-dump-times "vectorized 25 loops" 1 "vect" { target
>aarch64_sve } } }
>+! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target {
>aarch64*-*-* && { ! aarch64_sve } } } } }
> ! { dg-final { scan-tree-dump-times "vectorized 2\[234\] loops" 1 "vect" {
> target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
> ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target {
> { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
>diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>index 2ea8e983fe6..f1d7f01a9ce 100644
>--- a/gcc/tree-vect-data-refs.c
>+++ b/gcc/tree-vect-data-refs.c
>@@ -3969,8 +3969,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
>loop_vec_info loop_vinfo,
> /* True if we should aim to use internal functions rather than
> built-in functions. */
> bool use_ifn_p = (DR_IS_READ (dr)
>- ? supports_vec_gather_load_p ()
>- : supports_vec_scatter_store_p ());
>+ ? supports_vec_gather_load_p (TYPE_MODE (vectype))
>+ : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
>
> base = DR_REF (dr);
> /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,