This patch is an initial patch to move costing next to the transform, it still adopts vect_model_load_cost for costing but moves and duplicates it down according to the handlings of different vect_memory_access_types, hope it can make the subsequent patches easy to review. This patch should not have any functional changes.
gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_load): Move and duplicate the call to vect_model_load_cost down to some different transform paths according to the handlings of different vect_memory_access_types. --- gcc/tree-vect-stmts.cc | 86 ++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index a7acc032d47..44514658be3 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9430,7 +9430,9 @@ vectorizable_load (vec_info *vinfo, } } - if (!vec_stmt) /* transformation not required. */ + bool costing_p = !vec_stmt; + + if (costing_p) /* transformation not required. */ { if (slp_node && mask @@ -9464,17 +9466,13 @@ vectorizable_load (vec_info *vinfo, vinfo->any_known_not_updated_vssa = true; STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; - vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, - alignment_support_scheme, misalignment, - &gs_info, slp_node, cost_vec); - return true; } if (!slp) gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); - if (dump_enabled_p ()) + if (dump_enabled_p () && !costing_p) dump_printf_loc (MSG_NOTE, vect_location, "transform load. ncopies = %d\n", ncopies); @@ -9485,13 +9483,26 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) { - vect_build_gather_load_calls (vinfo, - stmt_info, gsi, vec_stmt, &gs_info, mask); + if (costing_p) + vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, + alignment_support_scheme, misalignment, &gs_info, + slp_node, cost_vec); + else + vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info, + mask); return true; } if (memory_access_type == VMAT_INVARIANT) { + if (costing_p) + { + vect_model_load_cost (vinfo, stmt_info, ncopies, vf, + memory_access_type, alignment_support_scheme, + misalignment, &gs_info, slp_node, cost_vec); + return true; + } + gcc_assert (!grouped_load && !mask && !bb_vinfo); /* If we have versioned for aliasing or the loop doesn't have any data dependencies that would preclude this, @@ -9548,6 +9559,14 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_STRIDED_SLP) { + if (costing_p) + { + vect_model_load_cost (vinfo, stmt_info, ncopies, vf, + memory_access_type, alignment_support_scheme, + misalignment, &gs_info, slp_node, cost_vec); + return true; + } + gimple_stmt_iterator incr_gsi; bool insert_after; tree offvar; @@ -9989,17 +10008,20 @@ vectorizable_load (vec_info *vinfo, here, since we can't guarantee first_stmt_info DR has been initialized yet, use first_stmt_info_for_drptr DR by bumping the distance from first_stmt_info DR instead as below. */ - if (!diff_first_stmt_info) - msq = vect_setup_realignment (vinfo, - first_stmt_info, gsi, &realignment_token, - alignment_support_scheme, NULL_TREE, - &at_loop); - if (alignment_support_scheme == dr_explicit_realign_optimized) - { - phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq)); - offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), - size_one_node); - gcc_assert (!first_stmt_info_for_drptr); + if (!costing_p) + { + if (!diff_first_stmt_info) + msq = vect_setup_realignment (vinfo, first_stmt_info, gsi, + &realignment_token, + alignment_support_scheme, NULL_TREE, + &at_loop); + if (alignment_support_scheme == dr_explicit_realign_optimized) + { + phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq)); + offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), + size_one_node); + gcc_assert (!first_stmt_info_for_drptr); + } } } else @@ -10020,8 +10042,9 @@ vectorizable_load (vec_info *vinfo, else if (memory_access_type == VMAT_GATHER_SCATTER) { aggr_type = elem_type; - vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, - &bump, &vec_offset); + if (!costing_p) + vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, &bump, + &vec_offset); } else { @@ -10035,7 +10058,7 @@ vectorizable_load (vec_info *vinfo, auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; - if (mask) + if (mask && !costing_p) { if (slp_node) vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], @@ -10049,7 +10072,7 @@ vectorizable_load (vec_info *vinfo, for (j = 0; j < ncopies; j++) { /* 1. Create the vector or array pointer update chain. */ - if (j == 0) + if (j == 0 && !costing_p) { bool simd_lane_access_p = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; @@ -10108,7 +10131,7 @@ vectorizable_load (vec_info *vinfo, if (mask) vec_mask = vec_masks[0]; } - else + else if (!costing_p) { gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)); if (dataref_offset) @@ -10125,7 +10148,7 @@ vectorizable_load (vec_info *vinfo, dr_chain.create (vec_num); gimple *new_stmt = NULL; - if (memory_access_type == VMAT_LOAD_STORE_LANES) + if (memory_access_type == VMAT_LOAD_STORE_LANES && !costing_p) { tree vec_array; @@ -10177,7 +10200,7 @@ vectorizable_load (vec_info *vinfo, /* Record that VEC_ARRAY is now dead. */ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); } - else + else if (!costing_p) { for (i = 0; i < vec_num; i++) { @@ -10631,7 +10654,7 @@ vectorizable_load (vec_info *vinfo, if (slp && !slp_perm) continue; - if (slp_perm) + if (slp_perm && !costing_p) { unsigned n_perms; /* For SLP we know we've seen all possible uses of dr_chain so @@ -10643,7 +10666,7 @@ vectorizable_load (vec_info *vinfo, nullptr, true); gcc_assert (ok); } - else + else if (!costing_p) { if (grouped_load) { @@ -10659,9 +10682,14 @@ vectorizable_load (vec_info *vinfo, } dr_chain.release (); } - if (!slp) + if (!slp && !costing_p) *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; + if (costing_p) + vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, + alignment_support_scheme, misalignment, &gs_info, + slp_node, cost_vec); + return true; } -- 2.31.1