On Fri, Nov 8, 2019 at 5:21 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > This PR was caused by the SLP handling in get_group_load_store_type > returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for > downward groups. > > A more elaborate fix would be to try to combine the reverse permutation > into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on > optimisation and not backport material. It might also not necessarily > be a win, if the target supports (say) reversing and odd/even swaps > as independent permutes but doesn't recognise the combined form. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK for trunk?
OK. > OK for branches without the assert, after a grace period? OK as well. Thanks, Richard. > Thanks, > Richard > > > 2019-11-08 Richard Sandiford <richard.sandif...@arm.com> > > gcc/ > PR tree-optimization/92420 > * tree-vect-stmts.c (get_negative_load_store_type): Move further > up file. > (get_group_load_store_type): Use it for reversed SLP accesses. > > gcc/testsuite/ > * gcc.dg/vect/pr92420.c: New test. > > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c 2019-11-08 09:06:29.482897293 +0000 > +++ gcc/tree-vect-stmts.c 2019-11-08 16:18:10.864080575 +0000 > @@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype) > return vect_gen_perm_mask_checked (vectype, indices); > } > > +/* A subroutine of get_load_store_type, with a subset of the same > + arguments. Handle the case where STMT_INFO is a load or store that > + accesses consecutive elements with a negative step. */ > + > +static vect_memory_access_type > +get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, > + vec_load_store_type vls_type, > + unsigned int ncopies) > +{ > + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); > + dr_alignment_support alignment_support_scheme; > + > + if (ncopies > 1) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "multiple types with negative step.\n"); > + return VMAT_ELEMENTWISE; > + } > + > + alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); > + if (alignment_support_scheme != dr_aligned > + && alignment_support_scheme != dr_unaligned_supported) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "negative step but alignment required.\n"); > + return VMAT_ELEMENTWISE; > + } > + > + if (vls_type == VLS_STORE_INVARIANT) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "negative step with invariant source;" > + " no permute needed.\n"); > + return VMAT_CONTIGUOUS_DOWN; > + } > + > + if (!perm_mask_for_reverse (vectype)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "negative step and reversing not supported.\n"); > + return VMAT_ELEMENTWISE; > + } > + > + return VMAT_CONTIGUOUS_REVERSE; > +} > + > /* STMT_INFO is either a masked or unconditional store. Return the value > being stored. */ > > @@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info > "Peeling for outer loop is not supported\n"); > return false; > } > - *memory_access_type = VMAT_CONTIGUOUS; > + int cmp = compare_step_with_zero (stmt_info); > + if (cmp < 0) > + *memory_access_type = get_negative_load_store_type > + (stmt_info, vectype, vls_type, 1); > + else > + { > + gcc_assert (!loop_vinfo || cmp > 0); > + *memory_access_type = VMAT_CONTIGUOUS; > + } > } > } > else > @@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info > return true; > } > > -/* A subroutine of get_load_store_type, with a subset of the same > - arguments. Handle the case where STMT_INFO is a load or store that > - accesses consecutive elements with a negative step. */ > - > -static vect_memory_access_type > -get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, > - vec_load_store_type vls_type, > - unsigned int ncopies) > -{ > - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); > - dr_alignment_support alignment_support_scheme; > - > - if (ncopies > 1) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "multiple types with negative step.\n"); > - return VMAT_ELEMENTWISE; > - } > - > - alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); > - if (alignment_support_scheme != dr_aligned > - && alignment_support_scheme != dr_unaligned_supported) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "negative step but alignment required.\n"); > - return VMAT_ELEMENTWISE; > - } > - > - if (vls_type == VLS_STORE_INVARIANT) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_NOTE, vect_location, > - "negative step with invariant source;" > - " no permute needed.\n"); > - return VMAT_CONTIGUOUS_DOWN; > - } > - > - if (!perm_mask_for_reverse (vectype)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "negative step and reversing not supported.\n"); > - return VMAT_ELEMENTWISE; > - } > - > - return VMAT_CONTIGUOUS_REVERSE; > -} > - > /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true > if there is a memory access type that the vectorized form can use, > storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers > Index: gcc/testsuite/gcc.dg/vect/pr92420.c > =================================================================== > --- /dev/null 2019-09-17 11:41:18.176664108 +0100 > +++ gcc/testsuite/gcc.dg/vect/pr92420.c 2019-11-08 16:18:10.864080575 +0000 > @@ -0,0 +1,48 @@ > +/* { dg-additional-options "-mavx2" { target avx_runtime } } */ > + > +#include "tree-vect.h" > + > +#define N 16 > +struct C { int r, i; }; > +struct C a[N], b[N], c[N], d[N], e[N]; > + > +__attribute__((noipa)) static void > +foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, > int w) > +{ > + int i; > + for (int i = 0; i < w; i++) > + { > + z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i; > + z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i; > + } > +} > + > +__attribute__((noipa)) static void > +bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, > int w) > +{ > + int i; > + for (int i = 0; i < w; i++) > + { > + z[i].r = x[i].r * y[i].r - x[i].i * y[i].i; > + z[i].i = x[i].i * y[i].r + x[i].r * y[i].i; > + } > +} > + > +int > +main () > +{ > + check_vect (); > + int i; > + for (i = 0; i < N; ++i) > + { > + a[i].r = N - i; a[i].i = i - N; > + b[i].r = i - N; b[i].i = i + N; > + c[i].r = -1 - i; c[i].i = 2 * N - 1 - i; > + } > + foo (a, b + N, d, N); > + bar (a, c, e, N); > + for (i = 0; i < N; ++i) > + if (d[i].r != e[i].r || d[i].i != e[i].i) > + __builtin_abort (); > + return 0; > +}