On Mon, May 4, 2015 at 4:15 AM, Richard Biener <rguent...@suse.de> wrote: > > The following fixes PR65935 where the vectorizer is confused after > SLP operands swapping to see the stmts in the IL with unswapped > operands. As we already swap for different def-kinds just swap > for other swaps as well. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > Richard. > > 2015-05-04 Richard Biener <rguent...@suse.de> > > PR tree-optimization/65935 > * tree-vect-slp.c (vect_build_slp_tree): If we swapped operands > then make sure to apply that swapping to the IL. > > * gcc.dg/vect/pr65935.c: New testcase. > > Index: gcc/tree-vect-slp.c > =================================================================== > *** gcc/tree-vect-slp.c (revision 222758) > --- gcc/tree-vect-slp.c (working copy) > *************** vect_build_slp_tree (loop_vec_info loop_ > *** 1081,1093 **** > dump_printf (MSG_NOTE, "%d ", j); > } > dump_printf (MSG_NOTE, "\n"); > ! /* And try again ... */ > if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, > group_size, max_nunits, loads, > vectorization_factor, > ! matches, npermutes, &this_tree_size, > max_tree_size)) > { > oprnd_info->def_stmts = vNULL; > SLP_TREE_CHILDREN (*node).quick_push (child); > continue; > --- 1081,1105 ---- > dump_printf (MSG_NOTE, "%d ", j); > } > dump_printf (MSG_NOTE, "\n"); > ! /* And try again with scratch 'matches' ... */ > ! bool *tem = XALLOCAVEC (bool, group_size); > if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, > group_size, max_nunits, loads, > vectorization_factor, > ! tem, npermutes, &this_tree_size, > max_tree_size)) > { > + /* ... so if successful we can apply the operand swapping > + to the GIMPLE IL. This is necessary because for example > + vect_get_slp_defs uses operand indexes and thus expects > + canonical operand order. */ > + for (j = 0; j < group_size; ++j) > + if (!matches[j]) > + { > + gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j]; > + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), > + gimple_assign_rhs2_ptr (stmt)); > + } > oprnd_info->def_stmts = vNULL; > SLP_TREE_CHILDREN (*node).quick_push (child); > continue; > Index: gcc/testsuite/gcc.dg/vect/pr65935.c > =================================================================== > *** gcc/testsuite/gcc.dg/vect/pr65935.c (revision 0) > --- gcc/testsuite/gcc.dg/vect/pr65935.c (working copy) > *************** > *** 0 **** > --- 1,63 ---- > + /* { dg-do run } */ > + /* { dg-additional-options "-O3" } */ > + /* { dg-require-effective-target vect_double } */ > + > + #include "tree-vect.h" > + > + extern void abort (void); > + extern void *malloc (__SIZE_TYPE__); > + > + struct site { > + struct { > + struct { > + double real; > + double imag; > + } e[3][3]; > + } link[32]; > + double phase[32]; > + } *lattice; > + int sites_on_node; > + > + void rephase (void) > + { > + int i,j,k,dir; > + struct site *s; > + for(i=0,s=lattice;i<sites_on_node;i++,s++) > + for(dir=0;dir<32;dir++) > + for(j=0;j<3;j++)for(k=0;k<3;k++) > + { > + s->link[dir].e[j][k].real *= s->phase[dir]; > + s->link[dir].e[j][k].imag *= s->phase[dir]; > + } > + } > + > + int main() > + { > + int i,j,k; > + check_vect (); > + sites_on_node = 1; > + lattice = malloc (sizeof (struct site) * sites_on_node); > + for (i = 0; i < 32; ++i) > + { > + lattice->phase[i] = i; > + for (j = 0; j < 3; ++j) > + for (k = 0; k < 3; ++k) > + { > + lattice->link[i].e[j][k].real = 1.0; > + lattice->link[i].e[j][k].imag = 1.0; > + __asm__ volatile ("" : : : "memory"); > + } > + } > + rephase (); > + for (i = 0; i < 32; ++i) > + for (j = 0; j < 3; ++j) > + for (k = 0; k < 3; ++k) > + if (lattice->link[i].e[j][k].real != i > + || lattice->link[i].e[j][k].imag != i) > + abort (); > + return 0; > + } > + > + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "slp1" } } */ > + /* { dg-final { cleanup-tree-dump "slp1" } } */ > + /* { dg-final { cleanup-tree-dump "vect" } } */
Need for these when it is a run-time test. -- H.J.