The following fixes a miscompilation by the recent vector load
optimization.  The check matching unused upper half of a vector
was incomplete.

Bootstrap / regtest running on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2019-05-06  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/90358
        * tree-vect-stmts.c (get_group_load_store_type): Properly
        detect unused upper half of load.
        (vectorizable_load): Likewise.

        * gcc.target/i386/pr90358.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 270902)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -2273,6 +2274,7 @@ get_group_load_store_type (stmt_vec_info
                   == dr_aligned
                  || alignment_support_scheme == dr_unaligned_supported)
              && known_eq (nunits, (group_size - gap) * 2)
+             && known_eq (nunits, group_size)
              && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
              && VECTOR_MODE_P (vmode)
              && targetm.vector_mode_supported_p (vmode)
@@ -8550,7 +8552,8 @@ vectorizable_load (stmt_vec_info stmt_in
                            && DR_GROUP_GAP (first_stmt_info) != 0
                            && known_eq (nunits,
                                         (group_size
-                                         - DR_GROUP_GAP (first_stmt_info)) * 
2))
+                                         - DR_GROUP_GAP (first_stmt_info)) * 2)
+                           && known_eq (nunits, group_size))
                          ltype = build_vector_type (TREE_TYPE (vectype),
                                                     (group_size
                                                      - DR_GROUP_GAP
Index: gcc/testsuite/gcc.target/i386/pr90358.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr90358.c     (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr90358.c     (working copy)
@@ -0,0 +1,35 @@
+/* PR target/90358 */
+/* { dg-do run { target { sse4_runtime } } } */
+/* { dg-options "-O3 -msse4" } */
+
+struct s { unsigned int a, b, c; };
+
+void __attribute__ ((noipa))
+foo (struct s *restrict s1, struct s *restrict s2, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      s1[i].b = s2[i].b;
+      s1[i].c = s2[i].c;
+      s2[i].c = 0;
+    }
+}
+                            
+#define N 12
+
+int
+main ()
+{
+  struct s s1[N], s2[N];
+  for (unsigned int j = 0; j < N; ++j)
+    {
+      s2[j].a = j * 5;
+      s2[j].b = j * 5 + 2;
+      s2[j].c = j * 5 + 4;
+    }
+  foo (s1, s2, N);
+  for (unsigned int j = 0; j < N; ++j)
+  if (s1[j].b != j * 5 + 2)
+    __builtin_abort ();
+  return 0;
+}

Reply via email to