Just noticed this when playing with some testcases. Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard. 2015-06-11 Richard Biener <rguent...@suse.de> * tree-vect-stmts.c (vectorizable_load): Properly start loads with the first element if this is grouped loads. * gcc.dg/vect/slp-perm-11.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 224324) +++ gcc/tree-vect-stmts.c (working copy) @@ -6247,13 +6247,19 @@ vectorizable_load (gimple stmt, gimple_s gcc_assert (!nested_in_vect_loop); + if (grouped_load) + first_dr = STMT_VINFO_DATA_REF + (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))); + else + first_dr = dr; + stride_base = fold_build_pointer_plus - (unshare_expr (DR_BASE_ADDRESS (dr)), + (DR_BASE_ADDRESS (first_dr), size_binop (PLUS_EXPR, - convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))), - convert_to_ptrofftype (DR_INIT (dr)))); - stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr))); + convert_to_ptrofftype (DR_OFFSET (first_dr)), + convert_to_ptrofftype (DR_INIT (first_dr)))); + stride_step = fold_convert (sizetype, DR_STEP (first_dr)); /* For a load with loop-invariant (but other than power-of-2) stride (i.e. not a grouped access) like so: @@ -6271,25 +6277,25 @@ vectorizable_load (gimple stmt, gimple_s vectemp = {tmp1, tmp2, ...} */ - ivstep = stride_step; - ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, + ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), stride_step, build_int_cst (TREE_TYPE (ivstep), vf)); standard_iv_increment_position (loop, &incr_gsi, &insert_after); - create_iv (stride_base, ivstep, NULL, + create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL, loop, &incr_gsi, insert_after, &offvar, NULL); incr = gsi_stmt (incr_gsi); set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); - stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); + stride_step = force_gimple_operand (unshare_expr (stride_step), + &stmts, true, NULL_TREE); if (stmts) gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); prev_stmt_info = NULL; running_off = offvar; - alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); + alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0); int nloads = nunits; tree ltype = TREE_TYPE (vectype); auto_vec<tree> dr_chain; Index: gcc/testsuite/gcc.dg/vect/slp-perm-11.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-perm-11.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/slp-perm-11.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +int a[64]; +int b[128]; + +void __attribute__((noinline, noclone)) +foo (int s) +{ + int i; + for (i = 0; i < 32; ++i) + { + a[2*i] = b[i*s+1]; + a[2*i+1] = b[i*s]; + } +} + +int main () +{ + int i; + check_vect (); + for (i = 0; i < 128; ++i) + { + b[i] = i; + __asm__ volatile (""); + } + foo (4); + for (i = 0; i < 64; ++i) + if (a[i] != (4*(i/2) + (i & 1) ^ 1)) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */