On May 22, 2015 5:13:16 PM GMT+02:00, Michael Matz <m...@suse.de> wrote:
>Hi,
>
>between Richis improvements of grouped accesses, and mine to strided 
>stores is an interaction that now leads to ICEs and wrong code after
>both 
>are in, for instance PR66251.  The added testcases reflects this 
>situation, and uses both, narrowing and widening (narrowing would still
>
>ICE, widening right now produce only wrong code).  The patch fixes the 
>testcase(s).
>
>It's currently regstrapping on x86_64-linux, okay for trunk if that 
>passes?

OK.

Thanks,
Richard.

>
>Ciao,
>Michael.
>
>       PR middle-end/66251
>
>       * tree-vect-stmts.c (vect_model_store_cost): Handled strided group
>       stores.
>       (vect_create_vectorized_demotion_stmts): Always set
>       STMT_VINFO_VEC_STMT, also with SLP.
>       (vectorizable_store): Handle strided group stores.
>
>testsuite/:
>       PR middle-end/66251
>       * gcc.dg/vect/pr66251.c: New test.
>
>Index: tree-vect-stmts.c
>===================================================================
>--- tree-vect-stmts.c  (revision 223577)
>+++ tree-vect-stmts.c  (working copy)
>@@ -1000,7 +1000,8 @@ vect_model_store_cost (stmt_vec_info stm
>    equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
>     access is instead being provided by a permute-and-store operation,
>      include the cost of the permutes.  */
>-  if (!store_lanes_p && group_size > 1)
>+  if (!store_lanes_p && group_size > 1
>+      && !STMT_VINFO_STRIDED_P (stmt_info))
>     {
>       /* Uses a high and low interleave or shuffle operations for each
>        needed permute.  */
>@@ -1014,21 +1015,24 @@ vect_model_store_cost (stmt_vec_info stm
>                          group_size);
>     }
> 
>+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>   /* Costs of the stores.  */
>-  if (STMT_VINFO_STRIDED_P (stmt_info))
>+  if (STMT_VINFO_STRIDED_P (stmt_info)
>+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
>     {
>       /* N scalar stores plus extracting the elements.  */
>-      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>       inside_cost += record_stmt_cost (body_cost_vec,
>                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>                                      scalar_store, stmt_info, 0, vect_body);
>-      inside_cost += record_stmt_cost (body_cost_vec,
>-                                     ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>-                                     vec_to_scalar, stmt_info, 0, vect_body);
>     }
>   else
>  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
> 
>+  if (STMT_VINFO_STRIDED_P (stmt_info))
>+    inside_cost += record_stmt_cost (body_cost_vec,
>+                                   ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>+                                   vec_to_scalar, stmt_info, 0, vect_body);
>+
>   if (dump_enabled_p ())
>     dump_printf_loc (MSG_NOTE, vect_location,
>                      "vect_model_store_cost: inside_cost = %d, "
>@@ -3377,15 +3381,13 @@ vect_create_vectorized_demotion_stmts (v
>            (or in STMT_VINFO_RELATED_STMT chain).  */
>         if (slp_node)
>           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>+
>+        if (!*prev_stmt_info)
>+          STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
>         else
>-          {
>-            if (!*prev_stmt_info)
>-              STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
>-            else
>-              STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
>+          STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
> 
>-            *prev_stmt_info = vinfo_for_stmt (new_stmt);
>-          }
>+        *prev_stmt_info = vinfo_for_stmt (new_stmt);
>       }
>     }
> 
>@@ -5155,15 +5157,27 @@ vectorizable_store (gimple stmt, gimple_
>     {
>       grouped_store = true;
>       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>-      if (!slp && !PURE_SLP_STMT (stmt_info))
>+      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>+      if (!slp
>+        && !PURE_SLP_STMT (stmt_info)
>+        && !STMT_VINFO_STRIDED_P (stmt_info))
>       {
>-        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>         if (vect_store_lanes_supported (vectype, group_size))
>           store_lanes_p = true;
>         else if (!vect_grouped_store_supported (vectype, group_size))
>           return false;
>       }
> 
>+      if (STMT_VINFO_STRIDED_P (stmt_info)
>+        && (slp || PURE_SLP_STMT (stmt_info))
>+        && (group_size > nunits
>+            || nunits % group_size != 0))
>+      {
>+        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>+                         "unhandled strided group store\n");
>+        return false;
>+      }
>+
>       if (first_stmt == stmt)
>       {
>      /* STMT is the leader of the group. Check the operands of all the
>@@ -5286,10 +5300,23 @@ vectorizable_store (gimple stmt, gimple_
>            ...
>          */
> 
>+      unsigned nstores = nunits;
>+      tree ltype = elem_type;
>+      if (slp)
>+      {
>+        nstores = nunits / group_size;
>+        if (group_size < nunits)
>+          ltype = build_vector_type (elem_type, group_size);
>+        else
>+          ltype = vectype;
>+        ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
>+        ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
>+      }
>+
>       ivstep = stride_step;
>       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
>                           build_int_cst (TREE_TYPE (ivstep),
>-                                         ncopies * nunits));
>+                                         ncopies * nstores));
> 
>       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
> 
>@@ -5315,22 +5342,22 @@ vectorizable_store (gimple stmt, gimple_
>         else
>           vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
> 
>-        for (i = 0; i < nunits; i++)
>+        for (i = 0; i < nstores; i++)
>           {
>             tree newref, newoff;
>             gimple incr, assign;
>-            tree size = TYPE_SIZE (elem_type);
>+            tree size = TYPE_SIZE (ltype);
>             /* Extract the i'th component.  */
>             tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
>                                     size);
>-            tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
>+            tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
>                                      size, pos);
> 
>             elem = force_gimple_operand_gsi (gsi, elem, true,
>                                              NULL_TREE, true,
>                                              GSI_SAME_STMT);
> 
>-            newref = build2 (MEM_REF, TREE_TYPE (vectype),
>+            newref = build2 (MEM_REF, ltype,
>                              running_off, alias_off);
> 
>             /* And store it to *running_off.  */
>Index: testsuite/gcc.dg/vect/pr66251.c
>===================================================================
>--- testsuite/gcc.dg/vect/pr66251.c    (revision 0)
>+++ testsuite/gcc.dg/vect/pr66251.c    (working copy)
>@@ -0,0 +1,79 @@
>+/* { dg-require-effective-target vect_int } */
>+/* { dg-require-effective-target vect_double } */
>+/* { dg-require-effective-target vect_floatint_cvt } */
>+/* { dg-require-effective-target vect_intfloat_cvt } */
>+/* { dg-require-effective-target vect_pack_trunc } */
>+/* { dg-require-effective-target vect_unpack } */
>+/* { dg-require-effective-target vect_hw_misalign } */
>+
>+#include "tree-vect.h"
>+
>+void __attribute__((noinline,noclone))
>+test1(_Complex double *a, _Complex int *b, int stride, int n)
>+{
>+  int i;
>+  for (i = 0; i < n; i++)
>+    {
>+      a[i*stride] = b[i*stride];
>+    }
>+}
>+
>+void __attribute__((noinline,noclone))
>+test2(_Complex int *a, _Complex double *b, int stride, int n)
>+{
>+  int i;
>+  for (i = 0; i < n; i++)
>+    {
>+      a[i*stride] = b[i*stride];
>+    }
>+}
>+
>+_Complex int ia[256];
>+_Complex double da[256];
>+
>+extern void abort (void);
>+
>+int main ()
>+{
>+  int i;
>+  int stride;
>+
>+  check_vect ();
>+
>+  for (stride = 1; stride < 15; stride++)
>+    {
>+      for (i = 0; i < 256; i++)
>+      {
>+        __real__ ia[i] = (i + stride) % 19;
>+        __imag__ ia[i] = (i + stride) % 23;
>+        __asm__ volatile ("");
>+      }
>+
>+      test1(da, ia, stride, 256/stride);
>+
>+      for (i = 0; i < 256/stride; i++)
>+      {
>+        if (da[i*stride] != ia[i*stride])
>+          abort ();
>+      }
>+
>+      for (i = 0; i < 256; i++)
>+      {
>+        __real__ da[i] = (i + stride + 1) % 29;
>+        __imag__ da[i] = (i + stride + 1) % 31;
>+        __asm__ volatile ("");
>+      }
>+
>+      test2(ia, da, stride, 256/stride);
>+
>+      for (i = 0; i < 256/stride; i++)
>+      {
>+        if (da[i*stride] != ia[i*stride])
>+          abort ();
>+      }
>+    }
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */


Reply via email to