Just noticed this when playing with some testcases.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-06-11  Richard Biener  <rguent...@suse.de>

        * tree-vect-stmts.c (vectorizable_load): Properly start loads
        with the first element if this is grouped loads.

        * gcc.dg/vect/slp-perm-11.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 224324)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -6247,13 +6247,19 @@ vectorizable_load (gimple stmt, gimple_s
 
       gcc_assert (!nested_in_vect_loop);
 
+      if (grouped_load)
+       first_dr = STMT_VINFO_DATA_REF
+           (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+      else
+       first_dr = dr;
+
       stride_base
        = fold_build_pointer_plus
-           (unshare_expr (DR_BASE_ADDRESS (dr)),
+           (DR_BASE_ADDRESS (first_dr),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
-                        convert_to_ptrofftype (DR_INIT (dr))));
-      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+                        convert_to_ptrofftype (DR_OFFSET (first_dr)),
+                        convert_to_ptrofftype (DR_INIT (first_dr))));
+      stride_step = fold_convert (sizetype, DR_STEP (first_dr));
 
       /* For a load with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -6271,25 +6277,25 @@ vectorizable_load (gimple stmt, gimple_s
             vectemp = {tmp1, tmp2, ...}
          */
 
-      ivstep = stride_step;
-      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), stride_step,
                            build_int_cst (TREE_TYPE (ivstep), vf));
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
-      create_iv (stride_base, ivstep, NULL,
+      create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
 
-      stride_step = force_gimple_operand (stride_step, &stmts, true, 
NULL_TREE);
+      stride_step = force_gimple_operand (unshare_expr (stride_step),
+                                         &stmts, true, NULL_TREE);
       if (stmts)
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
       running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 
0);
       int nloads = nunits;
       tree ltype = TREE_TYPE (vectype);
       auto_vec<tree> dr_chain;
Index: gcc/testsuite/gcc.dg/vect/slp-perm-11.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-perm-11.c     (revision 0)
+++ gcc/testsuite/gcc.dg/vect/slp-perm-11.c     (working copy)
@@ -0,0 +1,35 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int a[64];
+int b[128];
+
+void __attribute__((noinline, noclone))
+foo (int s)
+{
+  int i;
+  for (i = 0; i < 32; ++i)
+    {
+      a[2*i] = b[i*s+1];
+      a[2*i+1] = b[i*s];
+    }
+}
+
+int main ()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < 128; ++i)
+    {
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+  foo (4);
+  for (i = 0; i < 64; ++i)
+    if (a[i] != (4*(i/2) + (i & 1) ^ 1))
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target vect_perm } } } */

Reply via email to