For VMAT_CONTIGUOUS_REVERSE, the transform code in function
vectorizable_store generates a VEC_PERM_EXPR stmt before
storing, but it's never considered in costing.

This patch is to make it consider vec_perm in costing, it
adjusts the order of transform code a bit to make it easy
to early return for costing_p.

gcc/ChangeLog:

        * tree-vect-stmts.cc (vectorizable_store): Consider generated
        VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as
        vec_perm.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test.
---
 .../costmodel/ppc/costmodel-vect-store-2.c    | 29 +++++++++
 gcc/tree-vect-stmts.cc                        | 63 +++++++++++--------
 2 files changed, 65 insertions(+), 27 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
new file mode 100644
index 00000000000..72b67cf9040
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-additional-options "-mvsx" } */
+
+/* Verify we do cost the required vec_perm.  */
+
+int
+foo (int *a, int *b, int len)
+{
+  int i;
+  int *a1 = a;
+  int *a0 = a1 - 4;
+  for (i = 0; i < len; i++)
+    {
+      *b = *a0 + *a1;
+      b--;
+      a0++;
+      a1++;
+    }
+  return 0;
+}
+
+/* The reason why it doesn't check the exact count is that
+   we can get more than 1 vec_perm when it's compiled with
+   partial vector capability like Power10 (retrying for
+   the epilogue) or it's complied without unaligned vector
+   memory access support (realign).  */
+/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3d451c80bca..ce925cc1d53 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9279,6 +9279,40 @@ vectorizable_store (vec_info *vinfo,
       stmt_vec_info next_stmt_info = first_stmt_info;
       for (i = 0; i < vec_num; i++)
        {
+         if (!costing_p)
+           {
+             if (slp)
+               vec_oprnd = vec_oprnds[i];
+             else if (grouped_store)
+               /* For grouped stores vectorized defs are interleaved in
+                  vect_permute_store_chain().  */
+               vec_oprnd = result_chain[i];
+           }
+
+         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+           {
+             if (costing_p)
+               inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
+                                                stmt_info, 0, vect_body);
+             else
+               {
+                 tree perm_mask = perm_mask_for_reverse (vectype);
+                 tree perm_dest = vect_create_destination_var (
+                   vect_get_store_rhs (stmt_info), vectype);
+                 tree new_temp = make_ssa_name (perm_dest);
+
+                 /* Generate the permute statement.  */
+                 gimple *perm_stmt
+                   = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
+                                          vec_oprnd, perm_mask);
+                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+                                              gsi);
+
+                 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
+                 vec_oprnd = new_temp;
+               }
+           }
+
          if (costing_p)
            {
              vect_get_store_cost (vinfo, stmt_info, 1,
@@ -9294,8 +9328,6 @@ vectorizable_store (vec_info *vinfo,
 
              continue;
            }
-         unsigned misalign;
-         unsigned HOST_WIDE_INT align;
 
          tree final_mask = NULL_TREE;
          tree final_len = NULL_TREE;
@@ -9315,13 +9347,8 @@ vectorizable_store (vec_info *vinfo,
            dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                           stmt_info, bump);
 
-         if (slp)
-           vec_oprnd = vec_oprnds[i];
-         else if (grouped_store)
-           /* For grouped stores vectorized defs are interleaved in
-              vect_permute_store_chain().  */
-           vec_oprnd = result_chain[i];
-
+         unsigned misalign;
+         unsigned HOST_WIDE_INT align;
          align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
          if (alignment_support_scheme == dr_aligned)
            misalign = 0;
@@ -9338,24 +9365,6 @@ vectorizable_store (vec_info *vinfo,
                                    misalign);
          align = least_bit_hwi (misalign | align);
 
-         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
-           {
-             tree perm_mask = perm_mask_for_reverse (vectype);
-             tree perm_dest
-               = vect_create_destination_var (vect_get_store_rhs (stmt_info),
-                                              vectype);
-             tree new_temp = make_ssa_name (perm_dest);
-
-             /* Generate the permute statement.  */
-             gimple *perm_stmt
-               = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
-                                      vec_oprnd, perm_mask);
-             vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
-             perm_stmt = SSA_NAME_DEF_STMT (new_temp);
-             vec_oprnd = new_temp;
-           }
-
          /* Compute IFN when LOOP_LENS or final_mask valid.  */
          machine_mode vmode = TYPE_MODE (vectype);
          machine_mode new_vmode = vmode;
-- 
2.31.1

Reply via email to