The following fixes PR65935 where the vectorizer is confused after
SLP operands swapping to see the stmts in the IL with unswapped
operands.  As we already swap for different def-kinds just swap
for other swaps as well.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-05-04  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/65935
        * tree-vect-slp.c (vect_build_slp_tree): If we swapped operands
        then make sure to apply that swapping to the IL.

        * gcc.dg/vect/pr65935.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 222758)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_build_slp_tree (loop_vec_info loop_
*** 1081,1093 ****
                dump_printf (MSG_NOTE, "%d ", j);
              }
          dump_printf (MSG_NOTE, "\n");
!         /* And try again ... */
          if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
                                   group_size, max_nunits, loads,
                                   vectorization_factor,
!                                  matches, npermutes, &this_tree_size,
                                   max_tree_size))
            {
              oprnd_info->def_stmts = vNULL;
              SLP_TREE_CHILDREN (*node).quick_push (child);
              continue;
--- 1081,1105 ----
                dump_printf (MSG_NOTE, "%d ", j);
              }
          dump_printf (MSG_NOTE, "\n");
!         /* And try again with scratch 'matches' ... */
!         bool *tem = XALLOCAVEC (bool, group_size);
          if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
                                   group_size, max_nunits, loads,
                                   vectorization_factor,
!                                  tem, npermutes, &this_tree_size,
                                   max_tree_size))
            {
+             /* ... so if successful we can apply the operand swapping
+                to the GIMPLE IL.  This is necessary because for example
+                vect_get_slp_defs uses operand indexes and thus expects
+                canonical operand order.  */
+             for (j = 0; j < group_size; ++j)
+               if (!matches[j])
+                 {
+                   gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
+                   swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+                                      gimple_assign_rhs2_ptr (stmt));
+                 }
              oprnd_info->def_stmts = vNULL;
              SLP_TREE_CHILDREN (*node).quick_push (child);
              continue;
Index: gcc/testsuite/gcc.dg/vect/pr65935.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr65935.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr65935.c (working copy)
***************
*** 0 ****
--- 1,63 ----
+ /* { dg-do run } */
+ /* { dg-additional-options "-O3" } */
+ /* { dg-require-effective-target vect_double } */
+ 
+ #include "tree-vect.h"
+ 
+ extern void abort (void);
+ extern void *malloc (__SIZE_TYPE__);
+ 
+ struct site {
+     struct {
+       struct {
+           double real;
+           double imag;
+       } e[3][3];
+     } link[32];
+     double phase[32];
+ } *lattice;
+ int sites_on_node;
+ 
+ void rephase (void)
+ {
+   int i,j,k,dir;
+   struct site *s;
+   for(i=0,s=lattice;i<sites_on_node;i++,s++)
+     for(dir=0;dir<32;dir++)
+       for(j=0;j<3;j++)for(k=0;k<3;k++)
+       {
+         s->link[dir].e[j][k].real *= s->phase[dir];
+         s->link[dir].e[j][k].imag *= s->phase[dir];
+       }
+ }
+ 
+ int main()
+ {
+   int i,j,k;
+   check_vect ();
+   sites_on_node = 1;
+   lattice = malloc (sizeof (struct site) * sites_on_node);
+   for (i = 0; i < 32; ++i)
+     {
+       lattice->phase[i] = i;
+       for (j = 0; j < 3; ++j)
+       for (k = 0; k < 3; ++k)
+         {
+           lattice->link[i].e[j][k].real = 1.0;
+           lattice->link[i].e[j][k].imag = 1.0;
+           __asm__ volatile ("" : : : "memory");
+         }
+     }
+   rephase ();
+   for (i = 0; i < 32; ++i)
+     for (j = 0; j < 3; ++j)
+       for (k = 0; k < 3; ++k)
+       if (lattice->link[i].e[j][k].real != i
+           || lattice->link[i].e[j][k].imag != i)
+         abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "slp1" } } */
+ /* { dg-final { cleanup-tree-dump "slp1" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to