On Thu, Jul 8, 2021 at 2:04 PM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> -msve-vector-bits=128 causes the AArch64 port to list 128-bit Advanced
> SIMD as the first-choice mode for vectorisation, with SVE being used for
> things that Advanced SIMD can't handle as easily.  However, ifcvt would
> not then try to use SVE's predicated FP arithmetic, leading to tests
> like TSVC ControlFlow-flt failing to vectorise.
>
> The mask load/store code did try other vector modes, but could also be
> improved to make sure that SVEness sticks when computing derived modes.
>
> (Unlike mode_for_vector, related_vector_mode always returns a vector
> mode, so there's no need to check VECTOR_MODE_P as well.)
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.

Richard.

> Richard
>
>
> gcc/
>         * internal-fn.c (vectorized_internal_fn_supported_p): Handle
>         vector types first.  For scalar types, consider both the preferred
>         vector mode and the alternative vector modes.
>         * optabs-query.c (can_vec_mask_load_store_p): Use the same
>         structure as above, in particular using related_vector_mode
>         for modes provided by autovectorize_vector_modes.
>
> gcc/testsuite/
>         * gcc.target/aarch64/sve/cond_arith_6.c: New test.
> ---
>  gcc/internal-fn.c                             | 28 +++++++++++++++----
>  gcc/optabs-query.c                            | 23 +++++----------
>  .../gcc.target/aarch64/sve/cond_arith_6.c     | 14 ++++++++++
>  3 files changed, 43 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index fb8b43d1ce2..cd5e63f9acd 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -4109,16 +4109,32 @@ expand_internal_call (gcall *stmt)
>  bool
>  vectorized_internal_fn_supported_p (internal_fn ifn, tree type)
>  {
> +  if (VECTOR_MODE_P (TYPE_MODE (type)))
> +    return direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED);
> +
>    scalar_mode smode;
> -  if (!VECTOR_TYPE_P (type) && is_a <scalar_mode> (TYPE_MODE (type), &smode))
> +  if (!is_a <scalar_mode> (TYPE_MODE (type), &smode))
> +    return false;
> +
> +  machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> +  if (VECTOR_MODE_P (vmode))
>      {
> -      machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> -      if (VECTOR_MODE_P (vmode))
> -       type = build_vector_type_for_mode (type, vmode);
> +      tree vectype = build_vector_type_for_mode (type, vmode);
> +      if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
> +       return true;
>      }
>
> -  return (VECTOR_MODE_P (TYPE_MODE (type))
> -         && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
> +  auto_vector_modes vector_modes;
> +  targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
> +  for (machine_mode base_mode : vector_modes)
> +    if (related_vector_mode (base_mode, smode).exists (&vmode))
> +      {
> +       tree vectype = build_vector_type_for_mode (type, vmode);
> +       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
> +         return true;
> +      }
> +
> +  return false;
>  }
>
>  void
> diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> index 3248ce2c06e..05ee5f517da 100644
> --- a/gcc/optabs-query.c
> +++ b/gcc/optabs-query.c
> @@ -582,27 +582,18 @@ can_vec_mask_load_store_p (machine_mode mode,
>      return false;
>
>    vmode = targetm.vectorize.preferred_simd_mode (smode);
> -  if (!VECTOR_MODE_P (vmode))
> -    return false;
> -
> -  if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> +  if (VECTOR_MODE_P (vmode)
> +      && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
>        && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
>      return true;
>
>    auto_vector_modes vector_modes;
>    targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
> -  for (unsigned int i = 0; i < vector_modes.length (); ++i)
> -    {
> -      poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]);
> -      poly_uint64 nunits;
> -      if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits))
> -       continue;
> -      if (mode_for_vector (smode, nunits).exists (&vmode)
> -         && VECTOR_MODE_P (vmode)
> -         && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -         && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> -       return true;
> -    }
> +  for (machine_mode base_mode : vector_modes)
> +    if (related_vector_mode (base_mode, smode).exists (&vmode)
> +       && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> +       && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> +      return true;
>    return false;
>  }
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
> new file mode 100644
> index 00000000000..4085ab12444
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
> @@ -0,0 +1,14 @@
> +/* { dg-options "-O3 -msve-vector-bits=128" } */
> +
> +void
> +f (float *x)
> +{
> +  for (int i = 0; i < 100; ++i)
> +    if (x[i] > 1.0f)
> +      x[i] -= 1.0f;
> +}
> +
> +/* { dg-final { scan-assembler {\tld1w\tz} } } */
> +/* { dg-final { scan-assembler {\tfcmgt\tp} } } */
> +/* { dg-final { scan-assembler {\tfsub\tz} } } */
> +/* { dg-final { scan-assembler {\tst1w\tz} } } */

Reply via email to