Hi! While hacking on the gomp4 simd clauses stuff, I've noticed that we don't vectorize IVs with non-constant step. The following patch implements it, basically we just can't rely that the step or VF * step is a constant and instead for VF * step need to emit a temporary with scalar VF * step value before adding a vector temporary.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? I wasn't sure about nested loop vectorization, so I've disabled it for that case, perhaps with a testcase that exhibits it could be replaced just with a test that the step, if SSA_NAME, doesn't have definition even in the outer loop, because we still put the vector constants before the outer loop. 2013-06-25 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/57705 * tree-vect-loop.c (vect_is_simple_iv_evolution): Allow SSA_NAME step, provided that it is not defined inside the loop. (vect_analyze_scalar_cycles_1): Disallow SSA_NAME step in nested loop. (get_initial_def_for_induction): Handle SSA_NAME IV step. * gcc.dg/vect/pr57705.c: New test. * gcc.dg/vect/vect-iv-7.c: Add noclone attribute, remove xfail. --- gcc/tree-vect-loop.c.jj 2013-06-06 08:39:24.000000000 +0200 +++ gcc/tree-vect-loop.c 2013-06-25 11:17:02.947602981 +0200 @@ -500,7 +500,7 @@ vect_determine_vectorization_factor (loo /* Function vect_is_simple_iv_evolution. FORNOW: A simple evolution of an induction variables in the loop is - considered a polynomial evolution with constant step. */ + considered a polynomial evolution. */ static bool vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, @@ -509,6 +509,7 @@ vect_is_simple_iv_evolution (unsigned lo tree init_expr; tree step_expr; tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); + basic_block bb; /* When there is no evolution in this loop, the evolution function is not "simple". */ @@ -534,7 +535,10 @@ vect_is_simple_iv_evolution (unsigned lo *init = init_expr; *step = step_expr; - if (TREE_CODE (step_expr) != INTEGER_CST) + if (TREE_CODE (step_expr) != INTEGER_CST + && (TREE_CODE (step_expr) != SSA_NAME + || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr))) + && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb)))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -556,7 +560,7 @@ static void vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) { basic_block bb = loop->header; - tree dumy; + tree init, step; vec<gimple> worklist; worklist.create (64); gimple_stmt_iterator gsi; @@ -605,7 +609,9 @@ vect_analyze_scalar_cycles_1 (loop_vec_i } if (!access_fn - || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy)) + || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) + || (LOOP_VINFO_LOOP (loop_vinfo) != loop + && TREE_CODE (step) != INTEGER_CST)) { worklist.safe_push (phi); continue; @@ -3273,10 +3279,14 @@ get_initial_def_for_induction (gimple iv expr = build_int_cst (TREE_TYPE (step_expr), vf); new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), expr, step_expr); + if (TREE_CODE (step_expr) == SSA_NAME) + new_name = vect_init_vector (iv_phi, new_name, + TREE_TYPE (step_expr), NULL); } t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name)); + gcc_assert (CONSTANT_CLASS_P (new_name) + || TREE_CODE (new_name) == SSA_NAME); stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name)); gcc_assert (stepvectype); new_vec = build_vector_from_val (stepvectype, t); @@ -3332,8 +3342,12 @@ get_initial_def_for_induction (gimple iv expr = build_int_cst (TREE_TYPE (step_expr), nunits); new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), expr, step_expr); + if (TREE_CODE (step_expr) == SSA_NAME) + new_name = vect_init_vector (iv_phi, new_name, + TREE_TYPE (step_expr), NULL); t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name)); + gcc_assert (CONSTANT_CLASS_P (new_name) + || TREE_CODE (new_name) == SSA_NAME); new_vec = build_vector_from_val (stepvectype, t); vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL); --- gcc/testsuite/gcc.dg/vect/pr57705.c.jj 2013-06-25 10:15:21.037092153 +0200 +++ gcc/testsuite/gcc.dg/vect/pr57705.c 2013-06-25 10:15:02.239733689 +0200 @@ -0,0 +1,65 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int a[1024]; +unsigned char b[1024]; + +extern void abort (void); + +__attribute__((noinline, noclone)) void +foo (int k, int m) +{ + int i, k2 = k; + for (i = 0; i < 1024; i++) + { + a[i] = k2; + k2 += m + 1; + } +} + +__attribute__((noinline, noclone)) void +bar (int k, int m) +{ + int i, k2 = k; + for (i = 0; i < 1024; i++) + { + k2 += m + 1; + a[i] = k2; + } +} + +__attribute__((noinline, noclone)) void +baz (int k, int m) +{ + int i, k2 = k; + for (i = 0; i < 1024; i++) + { + a[i] = k2; + b[i] = i; + k2 += m + 1; + } +} + +int +main () +{ + int i; + check_vect (); + foo (5, 3); + for (i = 0; i < 1024; i++) + if (a[i] != 5 + 4 * i) + abort (); + bar (5, 3); + for (i = 0; i < 1024; i++) + if (a[i] != 9 + 4 * i) + abort (); + baz (5, 3); + for (i = 0; i < 1024; i++) + if (a[i] != 5 + 4 * i || b[i] != (unsigned char) i) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/vect-iv-7.c.jj 2008-09-05 12:54:35.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-iv-7.c 2013-06-25 10:37:35.864287369 +0200 @@ -6,7 +6,7 @@ #define N 16 int result[N] = {8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38}; -__attribute__ ((noinline)) int main1 (int X) +__attribute__ ((noinline, noclone)) int main1 (int X) { int arr[N]; int k = 3; @@ -38,5 +38,5 @@ int main (void) return main1 (2); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Jakub