For VMAT_CONTIGUOUS_REVERSE, the transform code in function vectorizable_store generates a VEC_PERM_EXPR stmt before storing, but it's never considered in costing.
This patch is to make it consider vec_perm in costing, it adjusts the order of transform code a bit to make it easy to early return for costing_p. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_store): Consider generated VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as vec_perm. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test. --- .../costmodel/ppc/costmodel-vect-store-2.c | 29 +++++++++ gcc/tree-vect-stmts.cc | 63 +++++++++++-------- 2 files changed, 65 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c new file mode 100644 index 00000000000..72b67cf9040 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-additional-options "-mvsx" } */ + +/* Verify we do cost the required vec_perm. */ + +int +foo (int *a, int *b, int len) +{ + int i; + int *a1 = a; + int *a0 = a1 - 4; + for (i = 0; i < len; i++) + { + *b = *a0 + *a1; + b--; + a0++; + a1++; + } + return 0; +} + +/* The reason why it doesn't check the exact count is that + we can get more than 1 vec_perm when it's compiled with + partial vector capability like Power10 (retrying for + the epilogue) or it's complied without unaligned vector + memory access support (realign). */ +/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 3d451c80bca..ce925cc1d53 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9279,6 +9279,40 @@ vectorizable_store (vec_info *vinfo, stmt_vec_info next_stmt_info = first_stmt_info; for (i = 0; i < vec_num; i++) { + if (!costing_p) + { + if (slp) + vec_oprnd = vec_oprnds[i]; + else if (grouped_store) + /* For grouped stores vectorized defs are interleaved in + vect_permute_store_chain(). */ + vec_oprnd = result_chain[i]; + } + + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) + { + if (costing_p) + inside_cost += record_stmt_cost (cost_vec, 1, vec_perm, + stmt_info, 0, vect_body); + else + { + tree perm_mask = perm_mask_for_reverse (vectype); + tree perm_dest = vect_create_destination_var ( + vect_get_store_rhs (stmt_info), vectype); + tree new_temp = make_ssa_name (perm_dest); + + /* Generate the permute statement. */ + gimple *perm_stmt + = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, + vec_oprnd, perm_mask); + vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, + gsi); + + perm_stmt = SSA_NAME_DEF_STMT (new_temp); + vec_oprnd = new_temp; + } + } + if (costing_p) { vect_get_store_cost (vinfo, stmt_info, 1, @@ -9294,8 +9328,6 @@ vectorizable_store (vec_info *vinfo, continue; } - unsigned misalign; - unsigned HOST_WIDE_INT align; tree final_mask = NULL_TREE; tree final_len = NULL_TREE; @@ -9315,13 +9347,8 @@ vectorizable_store (vec_info *vinfo, dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); - if (slp) - vec_oprnd = vec_oprnds[i]; - else if (grouped_store) - /* For grouped stores vectorized defs are interleaved in - vect_permute_store_chain(). */ - vec_oprnd = result_chain[i]; - + unsigned misalign; + unsigned HOST_WIDE_INT align; align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); if (alignment_support_scheme == dr_aligned) misalign = 0; @@ -9338,24 +9365,6 @@ vectorizable_store (vec_info *vinfo, misalign); align = least_bit_hwi (misalign | align); - if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) - { - tree perm_mask = perm_mask_for_reverse (vectype); - tree perm_dest - = vect_create_destination_var (vect_get_store_rhs (stmt_info), - vectype); - tree new_temp = make_ssa_name (perm_dest); - - /* Generate the permute statement. */ - gimple *perm_stmt - = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, - vec_oprnd, perm_mask); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); - - perm_stmt = SSA_NAME_DEF_STMT (new_temp); - vec_oprnd = new_temp; - } - /* Compute IFN when LOOP_LENS or final_mask valid. */ machine_mode vmode = TYPE_MODE (vectype); machine_mode new_vmode = vmode; -- 2.31.1