Hi!

While hacking on the gomp4 simd clauses stuff, I've noticed that
we don't vectorize IVs with non-constant step.  The following patch
implements it, basically we just can't rely that the step or VF * step
is a constant and instead for VF * step need to emit a temporary with
scalar VF * step value before adding a vector temporary.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

I wasn't sure about nested loop vectorization, so I've disabled it for that
case, perhaps with a testcase that exhibits it could be replaced just with a
test that the step, if SSA_NAME, doesn't have definition even in the outer
loop, because we still put the vector constants before the outer loop.

2013-06-25  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/57705
        * tree-vect-loop.c (vect_is_simple_iv_evolution): Allow
        SSA_NAME step, provided that it is not defined inside the loop.
        (vect_analyze_scalar_cycles_1): Disallow SSA_NAME step in nested
        loop.
        (get_initial_def_for_induction): Handle SSA_NAME IV step.

        * gcc.dg/vect/pr57705.c: New test.
        * gcc.dg/vect/vect-iv-7.c: Add noclone attribute, remove xfail.

--- gcc/tree-vect-loop.c.jj     2013-06-06 08:39:24.000000000 +0200
+++ gcc/tree-vect-loop.c        2013-06-25 11:17:02.947602981 +0200
@@ -500,7 +500,7 @@ vect_determine_vectorization_factor (loo
 /* Function vect_is_simple_iv_evolution.
 
    FORNOW: A simple evolution of an induction variables in the loop is
-   considered a polynomial evolution with constant step.  */
+   considered a polynomial evolution.  */
 
 static bool
 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
@@ -509,6 +509,7 @@ vect_is_simple_iv_evolution (unsigned lo
   tree init_expr;
   tree step_expr;
   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
+  basic_block bb;
 
   /* When there is no evolution in this loop, the evolution function
      is not "simple".  */
@@ -534,7 +535,10 @@ vect_is_simple_iv_evolution (unsigned lo
   *init = init_expr;
   *step = step_expr;
 
-  if (TREE_CODE (step_expr) != INTEGER_CST)
+  if (TREE_CODE (step_expr) != INTEGER_CST
+      && (TREE_CODE (step_expr) != SSA_NAME
+         || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
+             && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -556,7 +560,7 @@ static void
 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 {
   basic_block bb = loop->header;
-  tree dumy;
+  tree init, step;
   vec<gimple> worklist;
   worklist.create (64);
   gimple_stmt_iterator gsi;
@@ -605,7 +609,9 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
        }
 
       if (!access_fn
-         || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
+         || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
+         || (LOOP_VINFO_LOOP (loop_vinfo) != loop
+             && TREE_CODE (step) != INTEGER_CST))
        {
          worklist.safe_push (phi);
          continue;
@@ -3273,10 +3279,14 @@ get_initial_def_for_induction (gimple iv
       expr = build_int_cst (TREE_TYPE (step_expr), vf);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
                              expr, step_expr);
+      if (TREE_CODE (step_expr) == SSA_NAME)
+       new_name = vect_init_vector (iv_phi, new_name,
+                                    TREE_TYPE (step_expr), NULL);
     }
 
   t = unshare_expr (new_name);
-  gcc_assert (CONSTANT_CLASS_P (new_name));
+  gcc_assert (CONSTANT_CLASS_P (new_name)
+             || TREE_CODE (new_name) == SSA_NAME);
   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
   gcc_assert (stepvectype);
   new_vec = build_vector_from_val (stepvectype, t);
@@ -3332,8 +3342,12 @@ get_initial_def_for_induction (gimple iv
       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
                              expr, step_expr);
+      if (TREE_CODE (step_expr) == SSA_NAME)
+       new_name = vect_init_vector (iv_phi, new_name,
+                                    TREE_TYPE (step_expr), NULL);
       t = unshare_expr (new_name);
-      gcc_assert (CONSTANT_CLASS_P (new_name));
+      gcc_assert (CONSTANT_CLASS_P (new_name)
+                 || TREE_CODE (new_name) == SSA_NAME);
       new_vec = build_vector_from_val (stepvectype, t);
       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
 
--- gcc/testsuite/gcc.dg/vect/pr57705.c.jj      2013-06-25 10:15:21.037092153 
+0200
+++ gcc/testsuite/gcc.dg/vect/pr57705.c 2013-06-25 10:15:02.239733689 +0200
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int a[1024];
+unsigned char b[1024];
+
+extern void abort (void);
+
+__attribute__((noinline, noclone)) void
+foo (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = k2;
+      k2 += m + 1;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+bar (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      k2 += m + 1;
+      a[i] = k2;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+baz (int k, int m)
+{
+  int i, k2 = k;
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = k2;
+      b[i] = i;
+      k2 += m + 1;
+    }
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  foo (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 5 + 4 * i)
+      abort ();
+  bar (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 9 + 4 * i)
+      abort ();
+  baz (5, 3);
+  for (i = 0; i < 1024; i++)
+    if (a[i] != 5 + 4 * i || b[i] != (unsigned char) i)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-iv-7.c.jj    2008-09-05 12:54:35.000000000 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-iv-7.c       2013-06-25 10:37:35.864287369 
+0200
@@ -6,7 +6,7 @@
 #define N 16
 int result[N] = {8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 
38};
  
-__attribute__ ((noinline)) int main1 (int X)
+__attribute__ ((noinline, noclone)) int main1 (int X)
 {  
   int arr[N];
   int k = 3;
@@ -38,5 +38,5 @@ int main (void)
   return main1 (2);
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail 
*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */

        Jakub

Reply via email to