On Fri, Nov 8, 2019 at 5:21 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> This PR was caused by the SLP handling in get_group_load_store_type
> returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for
> downward groups.
>
> A more elaborate fix would be to try to combine the reverse permutation
> into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on
> optimisation and not backport material.  It might also not necessarily
> be a win, if the target supports (say) reversing and odd/even swaps
> as independent permutes but doesn't recognise the combined form.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for trunk?

OK.

> OK for branches without the assert, after a grace period?

OK as well.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> 2019-11-08  Richard Sandiford  <richard.sandif...@arm.com>
>
> gcc/
>         PR tree-optimization/92420
>         * tree-vect-stmts.c (get_negative_load_store_type): Move further
>         up file.
>         (get_group_load_store_type): Use it for reversed SLP accesses.
>
> gcc/testsuite/
>         * gcc.dg/vect/pr92420.c: New test.
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       2019-11-08 09:06:29.482897293 +0000
> +++ gcc/tree-vect-stmts.c       2019-11-08 16:18:10.864080575 +0000
> @@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype)
>    return vect_gen_perm_mask_checked (vectype, indices);
>  }
>
> +/* A subroutine of get_load_store_type, with a subset of the same
> +   arguments.  Handle the case where STMT_INFO is a load or store that
> +   accesses consecutive elements with a negative step.  */
> +
> +static vect_memory_access_type
> +get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
> +                             vec_load_store_type vls_type,
> +                             unsigned int ncopies)
> +{
> +  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
> +  dr_alignment_support alignment_support_scheme;
> +
> +  if (ncopies > 1)
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "multiple types with negative step.\n");
> +      return VMAT_ELEMENTWISE;
> +    }
> +
> +  alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
> +  if (alignment_support_scheme != dr_aligned
> +      && alignment_support_scheme != dr_unaligned_supported)
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "negative step but alignment required.\n");
> +      return VMAT_ELEMENTWISE;
> +    }
> +
> +  if (vls_type == VLS_STORE_INVARIANT)
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "negative step with invariant source;"
> +                        " no permute needed.\n");
> +      return VMAT_CONTIGUOUS_DOWN;
> +    }
> +
> +  if (!perm_mask_for_reverse (vectype))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "negative step and reversing not supported.\n");
> +      return VMAT_ELEMENTWISE;
> +    }
> +
> +  return VMAT_CONTIGUOUS_REVERSE;
> +}
> +
>  /* STMT_INFO is either a masked or unconditional store.  Return the value
>     being stored.  */
>
> @@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info
>                                  "Peeling for outer loop is not supported\n");
>               return false;
>             }
> -         *memory_access_type = VMAT_CONTIGUOUS;
> +         int cmp = compare_step_with_zero (stmt_info);
> +         if (cmp < 0)
> +           *memory_access_type = get_negative_load_store_type
> +             (stmt_info, vectype, vls_type, 1);
> +         else
> +           {
> +             gcc_assert (!loop_vinfo || cmp > 0);
> +             *memory_access_type = VMAT_CONTIGUOUS;
> +           }
>         }
>      }
>    else
> @@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info
>    return true;
>  }
>
> -/* A subroutine of get_load_store_type, with a subset of the same
> -   arguments.  Handle the case where STMT_INFO is a load or store that
> -   accesses consecutive elements with a negative step.  */
> -
> -static vect_memory_access_type
> -get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
> -                             vec_load_store_type vls_type,
> -                             unsigned int ncopies)
> -{
> -  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
> -  dr_alignment_support alignment_support_scheme;
> -
> -  if (ncopies > 1)
> -    {
> -      if (dump_enabled_p ())
> -       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "multiple types with negative step.\n");
> -      return VMAT_ELEMENTWISE;
> -    }
> -
> -  alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
> -  if (alignment_support_scheme != dr_aligned
> -      && alignment_support_scheme != dr_unaligned_supported)
> -    {
> -      if (dump_enabled_p ())
> -       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "negative step but alignment required.\n");
> -      return VMAT_ELEMENTWISE;
> -    }
> -
> -  if (vls_type == VLS_STORE_INVARIANT)
> -    {
> -      if (dump_enabled_p ())
> -       dump_printf_loc (MSG_NOTE, vect_location,
> -                        "negative step with invariant source;"
> -                        " no permute needed.\n");
> -      return VMAT_CONTIGUOUS_DOWN;
> -    }
> -
> -  if (!perm_mask_for_reverse (vectype))
> -    {
> -      if (dump_enabled_p ())
> -       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "negative step and reversing not supported.\n");
> -      return VMAT_ELEMENTWISE;
> -    }
> -
> -  return VMAT_CONTIGUOUS_REVERSE;
> -}
> -
>  /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
>     if there is a memory access type that the vectorized form can use,
>     storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
> Index: gcc/testsuite/gcc.dg/vect/pr92420.c
> ===================================================================
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.dg/vect/pr92420.c 2019-11-08 16:18:10.864080575 +0000
> @@ -0,0 +1,48 @@
> +/* { dg-additional-options "-mavx2" { target avx_runtime } } */
> +
> +#include "tree-vect.h"
> +
> +#define N 16
> +struct C { int r, i; };
> +struct C a[N], b[N], c[N], d[N], e[N];
> +
> +__attribute__((noipa)) static void
> +foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, 
> int w)
> +{
> +  int i;
> +  for (int i = 0; i < w; i++)
> +    {
> +      z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i;
> +      z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i;
> +    }
> +}
> +
> +__attribute__((noipa)) static void
> +bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, 
> int w)
> +{
> +  int i;
> +  for (int i = 0; i < w; i++)
> +    {
> +      z[i].r = x[i].r * y[i].r - x[i].i * y[i].i;
> +      z[i].i = x[i].i * y[i].r + x[i].r * y[i].i;
> +    }
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      a[i].r = N - i; a[i].i = i - N;
> +      b[i].r = i - N; b[i].i = i + N;
> +      c[i].r = -1 - i; c[i].i = 2 * N - 1 - i;
> +    }
> +  foo (a, b + N, d, N);
> +  bar (a, c, e, N);
> +  for (i = 0; i < N; ++i)
> +    if (d[i].r != e[i].r || d[i].i != e[i].i)
> +      __builtin_abort ();
> +  return 0;
> +}

Reply via email to