The following fixes PR65935 where the vectorizer is confused after SLP operands swapping to see the stmts in the IL with unswapped operands. As we already swap for different def-kinds just swap for other swaps as well.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. Richard. 2015-05-04 Richard Biener <rguent...@suse.de> PR tree-optimization/65935 * tree-vect-slp.c (vect_build_slp_tree): If we swapped operands then make sure to apply that swapping to the IL. * gcc.dg/vect/pr65935.c: New testcase. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 222758) --- gcc/tree-vect-slp.c (working copy) *************** vect_build_slp_tree (loop_vec_info loop_ *** 1081,1093 **** dump_printf (MSG_NOTE, "%d ", j); } dump_printf (MSG_NOTE, "\n"); ! /* And try again ... */ if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, max_nunits, loads, vectorization_factor, ! matches, npermutes, &this_tree_size, max_tree_size)) { oprnd_info->def_stmts = vNULL; SLP_TREE_CHILDREN (*node).quick_push (child); continue; --- 1081,1105 ---- dump_printf (MSG_NOTE, "%d ", j); } dump_printf (MSG_NOTE, "\n"); ! /* And try again with scratch 'matches' ... */ ! bool *tem = XALLOCAVEC (bool, group_size); if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, max_nunits, loads, vectorization_factor, ! tem, npermutes, &this_tree_size, max_tree_size)) { + /* ... so if successful we can apply the operand swapping + to the GIMPLE IL. This is necessary because for example + vect_get_slp_defs uses operand indexes and thus expects + canonical operand order. */ + for (j = 0; j < group_size; ++j) + if (!matches[j]) + { + gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j]; + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), + gimple_assign_rhs2_ptr (stmt)); + } oprnd_info->def_stmts = vNULL; SLP_TREE_CHILDREN (*node).quick_push (child); continue; Index: gcc/testsuite/gcc.dg/vect/pr65935.c =================================================================== *** gcc/testsuite/gcc.dg/vect/pr65935.c (revision 0) --- gcc/testsuite/gcc.dg/vect/pr65935.c (working copy) *************** *** 0 **** --- 1,63 ---- + /* { dg-do run } */ + /* { dg-additional-options "-O3" } */ + /* { dg-require-effective-target vect_double } */ + + #include "tree-vect.h" + + extern void abort (void); + extern void *malloc (__SIZE_TYPE__); + + struct site { + struct { + struct { + double real; + double imag; + } e[3][3]; + } link[32]; + double phase[32]; + } *lattice; + int sites_on_node; + + void rephase (void) + { + int i,j,k,dir; + struct site *s; + for(i=0,s=lattice;i<sites_on_node;i++,s++) + for(dir=0;dir<32;dir++) + for(j=0;j<3;j++)for(k=0;k<3;k++) + { + s->link[dir].e[j][k].real *= s->phase[dir]; + s->link[dir].e[j][k].imag *= s->phase[dir]; + } + } + + int main() + { + int i,j,k; + check_vect (); + sites_on_node = 1; + lattice = malloc (sizeof (struct site) * sites_on_node); + for (i = 0; i < 32; ++i) + { + lattice->phase[i] = i; + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + { + lattice->link[i].e[j][k].real = 1.0; + lattice->link[i].e[j][k].imag = 1.0; + __asm__ volatile ("" : : : "memory"); + } + } + rephase (); + for (i = 0; i < 32; ++i) + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + if (lattice->link[i].e[j][k].real != i + || lattice->link[i].e[j][k].imag != i) + abort (); + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "slp1" } } */ + /* { dg-final { cleanup-tree-dump "slp1" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */