This patch adjusts the cost handling on VMAT_INVARIANT in function vectorizable_load. We don't call function vect_model_load_cost for it any more.
To make the costing on VMAT_INVARIANT better, this patch is to query hoist_defs_of_uses for hoisting decision, and add costs for different "where" based on it. Currently function hoist_defs_of_uses would always hoist the defs of all SSA uses, adding one argument HOIST_P aims to avoid the actual hoisting during costing phase. gcc/ChangeLog: * tree-vect-stmts.cc (hoist_defs_of_uses): Add one argument HOIST_P. (vectorizable_load): Adjust the handling on VMAT_INVARIANT to respect hoisting decision and without calling vect_model_load_cost. (vect_model_load_cost): Assert it won't get VMAT_INVARIANT any more and remove VMAT_INVARIANT related handlings. --- gcc/tree-vect-stmts.cc | 61 +++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 744cdf40e26..19c61d703c8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1135,7 +1135,8 @@ vect_model_load_cost (vec_info *vinfo, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl); + gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl) + && memory_access_type != VMAT_INVARIANT); unsigned int inside_cost = 0, prologue_cost = 0; bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); @@ -1238,16 +1239,6 @@ vect_model_load_cost (vec_info *vinfo, ncopies * assumed_nunits, scalar_load, stmt_info, 0, vect_body); } - else if (memory_access_type == VMAT_INVARIANT) - { - /* Invariant loads will ideally be hoisted and splat to a vector. */ - prologue_cost += record_stmt_cost (cost_vec, 1, - scalar_load, stmt_info, 0, - vect_prologue); - prologue_cost += record_stmt_cost (cost_vec, 1, - scalar_to_vec, stmt_info, 0, - vect_prologue); - } else vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme, misalignment, first_stmt_p, @@ -9121,10 +9112,11 @@ permute_vec_elements (vec_info *vinfo, /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP, inserting them on the loops preheader edge. Returns true if we were successful in doing so (and thus STMT_INFO can be moved then), - otherwise returns false. */ + otherwise returns false. HOIST_P indicates if we want to hoist the + definitions of all SSA uses, it would be false when we are costing. */ static bool -hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop) +hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p) { ssa_op_iter i; tree op; @@ -9158,6 +9150,9 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop) if (!any) return true; + if (!hoist_p) + return true; + FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE) { gimple *def_stmt = SSA_NAME_DEF_STMT (op); @@ -9510,14 +9505,6 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_INVARIANT) { - if (costing_p) - { - vect_model_load_cost (vinfo, stmt_info, ncopies, vf, - memory_access_type, alignment_support_scheme, - misalignment, &gs_info, slp_node, cost_vec); - return true; - } - gcc_assert (!grouped_load && !mask && !bb_vinfo); /* If we have versioned for aliasing or the loop doesn't have any data dependencies that would preclude this, @@ -9525,7 +9512,37 @@ vectorizable_load (vec_info *vinfo, thus we can insert it on the preheader edge. */ bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) && !nested_in_vect_loop - && hoist_defs_of_uses (stmt_info, loop)); + && hoist_defs_of_uses (stmt_info, loop, !costing_p)); + if (costing_p) + { + if (hoist_p) + { + unsigned int prologue_cost; + prologue_cost = record_stmt_cost (cost_vec, 1, scalar_load, + stmt_info, 0, vect_prologue); + prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, + stmt_info, 0, vect_prologue); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_model_load_cost: inside_cost = 0, " + "prologue_cost = %d .\n", + prologue_cost); + } + else + { + unsigned int inside_cost; + inside_cost = record_stmt_cost (cost_vec, 1, scalar_load, + stmt_info, 0, vect_body); + inside_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, + stmt_info, 0, vect_body); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_model_load_cost: inside_cost = %d, " + "prologue_cost = 0 .\n", + inside_cost); + } + return true; + } if (hoist_p) { gassign *stmt = as_a <gassign *> (stmt_info->stmt); -- 2.31.1