The following patch makes us handle invariant loads during vectorization.
Dependence analysis currently isn't clever enough to disambiguate them
thus we insert versioning-for-alias checks.  For the testcase hoisting
the load is still always possible though, and for a read-after-write
dependence it would be possible for the vectorized loop copy as the
may-aliasing write is varying by the scalar variable size.

The existing code for vectorizing invariant accesses looks very
suspicious - it generates a vector load at the scalar address
to then just extract the first vector element.  Huh.  IMHO this
can be simplified as done, by just re-using the scalar load result.
But maybe this code was supposed to deal with something entirely
different?

This patch gives a 33% speedup to the phoronix himeno testcase
if you bump the maximum alias versioning checks we want to insert.

I'm currently re-bootstrapping & testing this but an earlier version
was ok on x86_64-unknown-linux-gnu.

2011-06-29  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/46787
        * tree-data-ref.c (dr_address_invariant_p): Remove.
        (find_data_references_in_stmt): Invariant accesses are ok now.
        * tree-vect-stmts.c (vectorizable_load): Handle invariant
        loads.
        * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
        invariant loads.

        * gcc.dg/vect/vect-121.c: New testcase.

Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c (revision 175531)
+++ gcc/tree-data-ref.c (working copy)
@@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference
     }
 }
 
-/* Returns true if the address of DR is invariant.  */
-
-static bool
-dr_address_invariant_p (struct data_reference *dr)
-{
-  unsigned i;
-  tree idx;
-
-  FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
-    if (tree_contains_chrecs (idx, NULL))
-      return false;
-
-  return true;
-}
-
 /* Frees data reference DR.  */
 
 void
@@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo
       dr = create_data_ref (nest, loop_containing_stmt (stmt),
                            *ref->pos, stmt, ref->is_read);
       gcc_assert (dr != NULL);
-
-      /* FIXME -- data dependence analysis does not work correctly for objects
-         with invariant addresses in loop nests.  Let us fail here until the
-        problem is fixed.  */
-      if (dr_address_invariant_p (dr) && nest)
-       {
-         free_data_ref (dr);
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "\tFAILED as dr address is invariant\n");
-         ret = false;
-         break;
-       }
-
       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
     }
   VEC_free (data_ref_loc, heap, references);
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 175531)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s
       && code != COMPONENT_REF
       && code != IMAGPART_EXPR
       && code != REALPART_EXPR
-      && code != MEM_REF)
+      && code != MEM_REF
+      && TREE_CODE_CLASS (code) != tcc_declaration)
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s
              if (inv_p && !bb_vinfo)
                {
                  gcc_assert (!strided_load);
-                 gcc_assert (nested_in_vect_loop_p (loop, stmt));
                  if (j == 0)
                    {
-                     int k;
-                     tree t = NULL_TREE;
-                     tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
-
-                     /* CHECKME: bitpos depends on endianess?  */
-                     bitpos = bitsize_zero_node;
-                     vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
-                                       bitsize, bitpos);
-                     vec_dest = vect_create_destination_var (scalar_dest,
-                                                             NULL_TREE);
-                     new_stmt = gimple_build_assign (vec_dest, vec_inv);
-                     new_temp = make_ssa_name (vec_dest, new_stmt);
-                     gimple_assign_set_lhs (new_stmt, new_temp);
-                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-                     for (k = nunits - 1; k >= 0; --k)
-                       t = tree_cons (NULL_TREE, new_temp, t);
-                     /* FIXME: use build_constructor directly.  */
-                     vec_inv = build_constructor_from_list (vectype, t);
+                     tree vec_inv;
+                     gimple_stmt_iterator gsi2 = *gsi;
+                     gsi_next (&gsi2);
+                     vec_inv = build_vector_from_val (vectype, scalar_dest);
                      new_temp = vect_init_vector (stmt, vec_inv,
-                                                  vectype, gsi);
+                                                  vectype, &gsi2);
                      new_stmt = SSA_NAME_DEF_STMT (new_temp);
                    }
                  else
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 175531)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -2302,9 +2302,9 @@ vect_analyze_data_ref_access (struct dat
       return false;
     }
 
-  /* Don't allow invariant accesses in loops.  */
+  /* Allow invariant loads in loops.  */
   if (loop_vinfo && dr_step == 0)
-    return false;
+    return DR_IS_READ (dr);
 
   if (loop && nested_in_vect_loop_p (loop, stmt))
     {
Index: gcc/testsuite/gcc.dg/vect/vect-121.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-121.c        (revision 0)
+++ gcc/testsuite/gcc.dg/vect/vect-121.c        (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+float *x;
+float parm;
+float
+test (int start, int end)
+{
+  int i;
+  for (i = start; i < end; ++i)
+    {
+      float tem = x[i];
+      x[i] = parm * tem;
+    }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to