The following fixes PR61680.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2014-07-07  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/61680
        * tree-vect-data-refs.c (vect_analyze_data_ref_dependence):
        Handle properly all read-write dependences with group accesses.

        * gcc.dg/vect/pr61680.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 212323)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -375,11 +397,14 @@ vect_analyze_data_ref_dependence (struct
                .. = a[i+1];
             where we will end up loading { a[i], a[i+1] } once, make
             sure that inserting group loads before the first load and
-            stores after the last store will do the right thing.  */
-         if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
-              && GROUP_SAME_DR_STMT (stmtinfo_a))
-             || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
-                 && GROUP_SAME_DR_STMT (stmtinfo_b)))
+            stores after the last store will do the right thing.
+            Similar for groups like
+               a[i] = ...;
+               ... = a[i];
+               a[i+1] = ...;
+            where loads from the group interleave with the store.  */
+         if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
+             || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
            {
              gimple earlier_stmt;
              earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
Index: gcc/testsuite/gcc.dg/vect/pr61680.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr61680.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr61680.c (working copy)
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+double v[4096][4];
+
+__attribute__((noinline, noclone)) void
+bar (double p[][4])
+{
+  int i;
+  double d = 172.0;
+  for (i = 0; i < 4096; i++)
+    {
+      if (p[i][0] != 6.0 || p[i][1] != 6.0 || p[i][2] != 10.0)
+       __builtin_abort ();
+      if (__builtin_fabs (p[i][3] - d) > 0.25)
+       __builtin_abort ();
+    }
+}
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  int i;
+  double w[4096][4], t;
+  for (i = 0; i < 4096; i++)
+    {
+      w[i][0] = v[i][0] + 2.0;
+      w[i][1] = v[i][1] + 1.0;
+      w[i][2] = v[i][2] + 4.0;
+      w[i][3] = (w[i][0] * w[i][0] + w[i][1] * w[i][1] + w[i][2] * w[i][2]);
+    }
+  bar (w);
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < 4096; i++)
+    {
+      v[i][0] = 4.0;
+      v[i][1] = 5.0;
+      v[i][2] = 6.0;
+    }
+  foo ();
+  return 0;
+}

Reply via email to