This tries to cleanup the API available to vectorizable_* to record stmt costs. There are several overloads of record_stmt_cost for this and the patch adds one only specifying SLP node and makes the one only having a stmt_vec_info suitable for scalar stmt processing only.
There are awkward spots left which can use the overload with the full set of parameters, SLP node, stmt_vec_info and vectype. One issue is that BB vectorization SLP instances have root statements that are not represented by a SLP node. The other big offender is dataref alignment peeling analysis which I plan to move away from the add_stmt API, back to the target hook based costing (just to be out of the way, not necessarily as final solution). For backends the main visible change will be that most calls to add_stmt_cost will now have a SLP node passed. I still pass a stmt_vec_info in addition to the SLP node to cause less disruption there. This is not the big vectorizer costing overhaul. Bootstrapped on x86_64-unknown-linux-gnu, testing revealed some cost related fallout. I'll eventually try to split this up. For now I want to see whether any of the asserts trip on aarch64/riscv. Richard. * tree-vectorizer.h (record_stmt_cost): Remove inline overload with stmt_vec_info argument, make out-of-line version of this no longer take a vectype - it is only for scalar stmt costs. (record_stmt_cost): Remove stmt_vec_info argument from inline overload with SLP node specified. * tree-vect-loop.cc (vect_model_reduction_cost): Take SLP node as argument and adjust. (vectorizable_lane_reducing): Use SLP node overload for record_stmt_cost. (vectorizable_reduction): Likewise. (vectorizable_phi): Likewise. (vectorizable_recurr): Likewise. (vectorizable_nonlinear_induction): Likewise. (vectorizable_induction): Likewise. (vectorizable_live_operation): Likewise. * tree-vect-slp.cc (vect_prologue_cost_for_slp): Use full-blown record_stmt_cost. (vectorizable_bb_reduc_epilogue): Likewise. (vect_bb_slp_scalar_cost): Adjust. * tree-vect-stmts.cc (record_stmt_cost): For stmt_vec_info overload assert we are only using it for scalar stmt costing. (record_stmt_cost): For SLP node overload record SLP_TREE_REPRESENTATIVE as stmt_vec_info. (vect_model_simple_cost): Do not get stmt_vec_info argument and adjust. (vect_model_promotion_demotion_cost): Get SLP node instead of stmt_vec_info argument and adjust. (vect_get_store_cost): Compute vectype based on whether we got SLP node or stmt_vec_info and use the full record_stmt_cost API. (vect_get_load_cost): Likewise. (vectorizable_bswap): Adjust. (vectorizable_call): Likewise. (vectorizable_simd_clone_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_assignment): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. (vectorizable_comparison_1): Likewise. --- gcc/tree-vect-loop.cc | 58 +++++++-------- gcc/tree-vect-slp.cc | 11 ++- gcc/tree-vect-stmts.cc | 160 ++++++++++++++++++++--------------------- gcc/tree-vectorizer.h | 23 ++---- 4 files changed, 118 insertions(+), 134 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index fe6f3cf188e..5abac27ec62 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -5283,7 +5283,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info) static void vect_model_reduction_cost (loop_vec_info loop_vinfo, - stmt_vec_info stmt_info, internal_fn reduc_fn, + slp_tree slp_node, internal_fn reduc_fn, vect_reduction_type reduction_type, int ncopies, stmt_vector_for_cost *cost_vec) { @@ -5299,9 +5299,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, if (reduction_type == COND_REDUCTION) ncopies *= 2; - vectype = STMT_VINFO_VECTYPE (stmt_info); + vectype = SLP_TREE_VECTYPE (slp_node); mode = TYPE_MODE (vectype); - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); + stmt_vec_info orig_stmt_info + = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (slp_node)); gimple_match_op op; if (!gimple_extract_op (orig_stmt_info->stmt, &op)) @@ -5319,16 +5320,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, if (reduc_fn != IFN_LAST) /* Count one reduction-like operation per vector. */ inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); else { /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, nelements, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, slp_node, 0, vect_body); inside_cost += record_stmt_cost (cost_vec, nelements, - scalar_stmt, stmt_info, 0, + scalar_stmt, orig_stmt_info, 0, vect_body); } } @@ -5345,7 +5346,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, /* We need the initial reduction value. */ prologue_stmts = 1; prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, slp_node, 0, vect_prologue); } @@ -5362,24 +5363,24 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, { /* An EQ stmt and an COND_EXPR stmt. */ epilogue_cost += record_stmt_cost (cost_vec, 2, - vector_stmt, stmt_info, 0, + vector_stmt, slp_node, 0, vect_epilogue); /* Reduction of the max index and a reduction of the found values. */ epilogue_cost += record_stmt_cost (cost_vec, 2, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, slp_node, 0, vect_epilogue); /* A broadcast of the max value. */ epilogue_cost += record_stmt_cost (cost_vec, 1, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, slp_node, 0, vect_epilogue); } else { epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, - stmt_info, 0, vect_epilogue); + slp_node, 0, vect_epilogue); epilogue_cost += record_stmt_cost (cost_vec, 1, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, slp_node, 0, vect_epilogue); } } @@ -5389,12 +5390,12 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, /* Extraction of scalar elements. */ epilogue_cost += record_stmt_cost (cost_vec, 2 * estimated_nunits, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, slp_node, 0, vect_epilogue); /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ epilogue_cost += record_stmt_cost (cost_vec, 2 * estimated_nunits - 3, - scalar_stmt, stmt_info, 0, + scalar_stmt, orig_stmt_info, 0, vect_epilogue); } else if (reduction_type == EXTRACT_LAST_REDUCTION @@ -5420,10 +5421,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, Also requires scalar extract. */ epilogue_cost += record_stmt_cost (cost_vec, exact_log2 (nelements) * 2, - vector_stmt, stmt_info, 0, + vector_stmt, slp_node, 0, vect_epilogue); epilogue_cost += record_stmt_cost (cost_vec, 1, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, slp_node, 0, vect_epilogue); } else @@ -5431,7 +5432,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, elements, we have N extracts and N-1 reduction ops. */ epilogue_cost += record_stmt_cost (cost_vec, nelements + nelements - 1, - vector_stmt, stmt_info, 0, + vector_stmt, slp_node, 0, vect_epilogue); } } @@ -7421,7 +7422,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, value and one that contains half of its negative. */ int prologue_stmts = 2; unsigned cost = record_stmt_cost (cost_vec, prologue_stmts, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, slp_node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf (MSG_NOTE, "vectorizable_lane_reducing: " @@ -7431,7 +7432,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, ncopies_for_cost *= 4; } - record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, stmt_info, + record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, slp_node, 0, vect_body); if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) @@ -8345,13 +8346,14 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } - vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn, + vect_model_reduction_cost (loop_vinfo, slp_for_stmt_info, reduc_fn, reduction_type, ncopies, cost_vec); /* Cost the reduction op inside the loop if transformed via vect_transform_reduction for non-lane-reducing operation. Otherwise this is costed by the separate vectorizable_* routines. */ if (single_defuse_cycle) - record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body); + record_stmt_cost (cost_vec, ncopies, vector_stmt, + slp_for_stmt_info, 0, vect_body); if (dump_enabled_p () && reduction_type == FOLD_LEFT_REDUCTION) @@ -9125,7 +9127,7 @@ vectorizable_phi (vec_info *, favoring the vector path (but may pessimize it in some cases). */ if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1) record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, vectype, 0, vect_body); + vector_stmt, slp_node, 0, vect_body); STMT_VINFO_TYPE (stmt_info) = phi_info_type; return true; } @@ -9306,7 +9308,7 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0, vect_prologue); unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizable_recurr: inside_cost = %d, " @@ -9837,7 +9839,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, /* loop cost for vec_loop. Neg induction doesn't have any inside_cost. */ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); /* loop cost for vec_loop. Neg induction doesn't have any inside_cost. */ @@ -9846,7 +9848,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, /* prologue cost for vec_init and vec_step. */ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -10172,11 +10174,11 @@ vectorizable_induction (loop_vec_info loop_vinfo, inside_cost = record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, 0, vect_body); + vector_stmt, slp_node, 0, vect_body); /* prologue cost for vec_init (if not nested) and step. */ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); } else /* if (!slp_node) */ { @@ -11157,7 +11159,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, } /* ??? Enable for loop costing as well. */ if (!loop_vinfo) - record_stmt_cost (cost_vec, 1, vec_to_scalar, stmt_info, NULL_TREE, + record_stmt_cost (cost_vec, 1, vec_to_scalar, slp_node, NULL_TREE, 0, vect_epilogue); return true; } diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f7c51b6cf68..8d0a612577b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8038,7 +8038,7 @@ vect_prologue_cost_for_slp (slp_tree node, we are costing so avoid passing it down more than once. Pass it to the first vec_construct or scalar_to_vec part since for those the x86 backend tries to account for GPR to XMM register moves. */ - record_stmt_cost (cost_vec, 1, kind, + record_stmt_cost (cost_vec, 1, kind, nullptr, (kind != vector_load && !passed) ? node : nullptr, vectype, 0, vect_prologue); if (kind != vector_load) @@ -8463,11 +8463,11 @@ vectorizable_bb_reduc_epilogue (slp_instance instance, cost log2 vector operations plus shuffles and one extraction. */ unsigned steps = floor_log2 (vect_nunits_for_cost (vectype)); record_stmt_cost (cost_vec, steps, vector_stmt, instance->root_stmts[0], - vectype, 0, vect_body); + NULL, vectype, 0, vect_body); record_stmt_cost (cost_vec, steps, vec_perm, instance->root_stmts[0], - vectype, 0, vect_body); + NULL, vectype, 0, vect_body); record_stmt_cost (cost_vec, 1, vec_to_scalar, instance->root_stmts[0], - vectype, 0, vect_body); + NULL, vectype, 0, vect_body); /* Since we replace all stmts of a possibly longer scalar reduction chain account for the extra scalar stmts for that. */ @@ -8890,8 +8890,7 @@ next_lane: continue; else kind = scalar_stmt; - record_stmt_cost (cost_vec, 1, kind, orig_stmt_info, - SLP_TREE_VECTYPE (node), 0, vect_body); + record_stmt_cost (cost_vec, 1, kind, orig_stmt_info, 0, vect_body); } auto_vec<bool, 20> subtree_life; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ab60f0eb657..8f38d8bcb7c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -117,11 +117,13 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, unsigned record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, - tree vectype, int misalign, - enum vect_cost_model_location where) + int misalign, enum vect_cost_model_location where) { + gcc_assert (kind == scalar_stmt + || kind == scalar_load + || kind == scalar_store); return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL, - vectype, misalign, where); + NULL_TREE, misalign, where); } unsigned @@ -130,7 +132,8 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, tree vectype, int misalign, enum vect_cost_model_location where) { - return record_stmt_cost (body_cost_vec, count, kind, NULL, node, + return record_stmt_cost (body_cost_vec, count, kind, + SLP_TREE_REPRESENTATIVE (node), node, vectype, misalign, where); } @@ -905,11 +908,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) be generated for the single vector op. We will handle that shortly. */ static void -vect_model_simple_cost (vec_info *, - stmt_vec_info stmt_info, int ncopies, - enum vect_def_type *dt, - int ndts, - slp_tree node, +vect_model_simple_cost (vec_info *, int ncopies, enum vect_def_type *dt, + int ndts, slp_tree node, stmt_vector_for_cost *cost_vec, vect_cost_for_stmt kind = vector_stmt) { @@ -928,11 +928,11 @@ vect_model_simple_cost (vec_info *, for (int i = 0; i < ndts; i++) if (dt[i] == vect_constant_def || dt[i] == vect_external_def) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, 0, vect_prologue); + node, 0, vect_prologue); /* Pass the inside-of-loop statements to the target-specific cost model. */ inside_cost += record_stmt_cost (cost_vec, ncopies, kind, - stmt_info, 0, vect_body); + node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -950,7 +950,7 @@ vect_model_simple_cost (vec_info *, is true the stmt is doing widening arithmetic. */ static void -vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, +vect_model_promotion_demotion_cost (slp_tree node, enum vect_def_type *dt, unsigned int ncopies, int pwr, stmt_vector_for_cost *cost_vec, @@ -964,7 +964,7 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, inside_cost += record_stmt_cost (cost_vec, ncopies, widen_arith ? vector_stmt : vec_promote_demote, - stmt_info, 0, vect_body); + node, 0, vect_body); ncopies *= 2; } @@ -972,7 +972,7 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, for (i = 0; i < 2; i++) if (dt[i] == vect_constant_def || dt[i] == vect_external_def) prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, - stmt_info, 0, vect_prologue); + node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1019,13 +1019,15 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, unsigned int *inside_cost, stmt_vector_for_cost *body_cost_vec) { + tree vectype + = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info); switch (alignment_support_scheme) { case dr_aligned: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies, - vector_store, stmt_info, slp_node, 0, - vect_body); + vector_store, stmt_info, slp_node, + vectype, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1038,7 +1040,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, /* Here, we assign an additional cost for the unaligned store. */ *inside_cost += record_stmt_cost (body_cost_vec, ncopies, unaligned_store, stmt_info, slp_node, - misalignment, vect_body); + vectype, misalignment, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_store_cost: unaligned supported by " @@ -1072,12 +1074,16 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, stmt_vector_for_cost *body_cost_vec, bool record_prologue_costs) { + tree vectype + = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info); + switch (alignment_support_scheme) { case dr_aligned: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1090,7 +1096,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, /* Here, we assign an additional cost for the unaligned load. */ *inside_cost += record_stmt_cost (body_cost_vec, ncopies, unaligned_load, stmt_info, slp_node, - misalignment, vect_body); + vectype, misalignment, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1102,18 +1108,19 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, case dr_explicit_realign: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, - vector_load, stmt_info, slp_node, 0, - vect_body); + vector_load, stmt_info, slp_node, + vectype, 0, vect_body); *inside_cost += record_stmt_cost (body_cost_vec, ncopies, - vec_perm, stmt_info, slp_node, 0, - vect_body); + vec_perm, stmt_info, slp_node, + vectype, 0, vect_body); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the prologue costs. */ if (targetm.vectorize.builtin_mask_for_load) *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1139,17 +1146,21 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, { *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, vector_stmt, stmt_info, - slp_node, 0, vect_prologue); + slp_node, vectype, + 0, vect_prologue); if (targetm.vectorize.builtin_mask_for_load) *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt, stmt_info, - slp_node, 0, vect_prologue); + slp_node, vectype, + 0, vect_prologue); } *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3406,11 +3417,11 @@ vectorizable_bswap (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_bswap"); record_stmt_cost (cost_vec, - 1, vector_stmt, stmt_info, 0, vect_prologue); + 1, vector_stmt, slp_node, 0, vect_prologue); record_stmt_cost (cost_vec, slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies, - vec_perm, stmt_info, 0, vect_body); + vec_perm, slp_node, 0, vect_body); return true; } @@ -3756,11 +3767,10 @@ vectorizable_call (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_call"); - vect_model_simple_cost (vinfo, stmt_info, - ncopies, dt, ndts, slp_node, cost_vec); + vect_model_simple_cost (vinfo, ncopies, dt, ndts, slp_node, cost_vec); if (ifn != IFN_LAST && modifier == NARROW && !slp_node) record_stmt_cost (cost_vec, ncopies / 2, - vec_promote_demote, stmt_info, 0, vect_body); + vec_promote_demote, slp_node, 0, vect_body); if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) @@ -4724,8 +4734,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); -/* vect_model_simple_cost (vinfo, stmt_info, ncopies, - dt, slp_node, cost_vec); */ +/* vect_model_simple_cost (vinfo, ncopies, dt, slp_node, cost_vec); */ return true; } @@ -5922,7 +5931,7 @@ vectorizable_conversion (vec_info *vinfo, if (modifier == NONE) { STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; - vect_model_simple_cost (vinfo, stmt_info, (1 + multi_step_cvt), + vect_model_simple_cost (vinfo, (1 + multi_step_cvt), dt, ndts, slp_node, cost_vec); } else if (modifier == NARROW_SRC || modifier == NARROW_DST) @@ -5930,7 +5939,7 @@ vectorizable_conversion (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, + vect_model_promotion_demotion_cost (slp_node, dt, nvectors, multi_step_cvt, cost_vec, widen_arith); } @@ -5942,7 +5951,7 @@ vectorizable_conversion (vec_info *vinfo, so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt; - vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, + vect_model_promotion_demotion_cost (slp_node, dt, nvectors, multi_step_cvt, cost_vec, widen_arith); } @@ -6291,8 +6300,7 @@ vectorizable_assignment (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_assignment"); if (!vect_nop_conversion_p (stmt_info)) - vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, - cost_vec); + vect_model_simple_cost (vinfo, ncopies, dt, ndts, slp_node, cost_vec); return true; } @@ -6662,7 +6670,7 @@ vectorizable_shift (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_shift"); - vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, + vect_model_simple_cost (vinfo, ncopies, dt, scalar_shift_arg ? 1 : ndts, slp_node, cost_vec); return true; } @@ -7099,8 +7107,7 @@ vectorizable_operation (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); - vect_model_simple_cost (vinfo, stmt_info, - 1, dt, ndts, slp_node, cost_vec); + vect_model_simple_cost (vinfo, 1, dt, ndts, slp_node, cost_vec); if (using_emulated_vectors_p) { /* The above vect_model_simple_cost call handles constants @@ -8658,7 +8665,7 @@ vectorizable_store (vec_info *vinfo, unsigned int inside_cost = 0, prologue_cost = 0; if (vls_type == VLS_STORE_INVARIANT) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); vect_get_store_cost (vinfo, stmt_info, slp_node, ncopies, alignment_support_scheme, misalignment, &inside_cost, cost_vec); @@ -8730,7 +8737,7 @@ vectorizable_store (vec_info *vinfo, } else if (vls_type != VLS_STORE_INVARIANT) return; - *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, + *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, slp_node, 0, vect_prologue); }; @@ -9039,7 +9046,7 @@ vectorizable_store (vec_info *vinfo, if (nstores > 1) inside_cost += record_stmt_cost (cost_vec, n_adjacent_stores, - vec_to_scalar, stmt_info, slp_node, + vec_to_scalar, slp_node, 0, vect_body); } if (dump_enabled_p ()) @@ -9377,7 +9384,7 @@ vectorizable_store (vec_info *vinfo, { if (costing_p && vls_type == VLS_STORE_INVARIANT) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, slp_node, 0, + slp_node, 0, vect_prologue); else if (!costing_p) { @@ -9452,8 +9459,7 @@ vectorizable_store (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); continue; } @@ -9521,7 +9527,7 @@ vectorizable_store (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } @@ -9629,14 +9635,14 @@ vectorizable_store (vec_info *vinfo, consumed by the load). */ inside_cost += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); /* N scalar stores plus extracting the elements. */ inside_cost += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } @@ -9830,8 +9836,7 @@ vectorizable_store (vec_info *vinfo, int group_size = DR_GROUP_SIZE (first_stmt_info); int nstmts = ceil_log2 (group_size) * group_size; inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_store_cost: " @@ -9860,8 +9865,7 @@ vectorizable_store (vec_info *vinfo, { if (costing_p) inside_cost += record_stmt_cost (cost_vec, 1, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree perm_mask = perm_mask_for_reverse (vectype); @@ -10080,11 +10084,11 @@ vectorizable_store (vec_info *vinfo, /* Spill. */ prologue_cost += record_stmt_cost (cost_vec, ncopies, vector_store, - stmt_info, slp_node, 0, vect_epilogue); + slp_node, 0, vect_epilogue); /* Loads. */ prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load, - stmt_info, slp_node, 0, vect_epilogue); + slp_node, 0, vect_epilogue); } } } @@ -10657,9 +10661,8 @@ vectorizable_load (vec_info *vinfo, enum vect_cost_model_location cost_loc = hoist_p ? vect_prologue : vect_body; unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load, - stmt_info, slp_node, 0, - cost_loc); - cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, + stmt_info, 0, cost_loc); + cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, slp_node, 0, cost_loc); unsigned int prologue_cost = hoist_p ? cost : 0; unsigned int inside_cost = hoist_p ? 0 : cost; @@ -10925,8 +10928,7 @@ vectorizable_load (vec_info *vinfo, n_adjacent_loads++; else inside_cost += record_stmt_cost (cost_vec, 1, scalar_load, - stmt_info, slp_node, 0, - vect_body); + stmt_info, 0, vect_body); continue; } tree this_off = build_int_cst (TREE_TYPE (alias_off), @@ -10964,8 +10966,7 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) inside_cost += record_stmt_cost (cost_vec, 1, vec_construct, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree vec_inv = build_constructor (lvectype, v); @@ -11020,8 +11021,7 @@ vectorizable_load (vec_info *vinfo, vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf, true, &n_perms, &n_loads); inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, - first_stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); } else vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf, @@ -11591,7 +11591,7 @@ vectorizable_load (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, 0, vect_body); continue; } if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) @@ -11667,7 +11667,7 @@ vectorizable_load (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, 0, vect_body); continue; } poly_uint64 offset_nunits @@ -11796,16 +11796,16 @@ vectorizable_load (vec_info *vinfo, /* For emulated gathers N offset vector element offset add is consumed by the load). */ inside_cost = record_stmt_cost (cost_vec, const_nunits, - vec_to_scalar, stmt_info, + vec_to_scalar, slp_node, 0, vect_body); /* N scalar loads plus gathering them into a vector. */ inside_cost = record_stmt_cost (cost_vec, const_nunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, 0, vect_body); inside_cost = record_stmt_cost (cost_vec, 1, vec_construct, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } unsigned HOST_WIDE_INT const_offset_nunits @@ -12466,8 +12466,7 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) inside_cost = record_stmt_cost (cost_vec, 1, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree perm_mask = perm_mask_for_reverse (vectype); @@ -12536,8 +12535,7 @@ vectorizable_load (vec_info *vinfo, vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf, true, &n_perms, nullptr); inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); } else { @@ -12564,8 +12562,7 @@ vectorizable_load (vec_info *vinfo, int group_size = DR_GROUP_SIZE (first_stmt_info); int nstmts = ceil_log2 (group_size) * group_size; inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -12985,7 +12982,7 @@ vectorizable_condition (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; - vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node, + vect_model_simple_cost (vinfo, ncopies, dts, ndts, slp_node, cost_vec, kind); return true; } @@ -13417,8 +13414,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, return false; } - vect_model_simple_cost (vinfo, stmt_info, - ncopies * (1 + (bitop2 != NOP_EXPR)), + vect_model_simple_cost (vinfo, ncopies * (1 + (bitop2 != NOP_EXPR)), dts, ndts, slp_node, cost_vec); return true; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a2f33a5ecd6..990072fca95 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2418,7 +2418,7 @@ extern int compare_step_with_zero (vec_info *, stmt_vec_info); extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, enum vect_cost_for_stmt, stmt_vec_info, - tree, int, enum vect_cost_model_location); + int, enum vect_cost_model_location); extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, enum vect_cost_for_stmt, slp_tree, tree, int, enum vect_cost_model_location); @@ -2430,28 +2430,15 @@ extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, slp_tree, tree, int, enum vect_cost_model_location); -/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */ - -inline unsigned -record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, - enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, - int misalign, enum vect_cost_model_location where) -{ - return record_stmt_cost (body_cost_vec, count, kind, stmt_info, - STMT_VINFO_VECTYPE (stmt_info), misalign, where); -} - -/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO and - SLP node specified. */ +/* Overload of record_stmt_cost with VECTYPE derived from SLP node. */ inline unsigned record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, - enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, - slp_tree node, + enum vect_cost_for_stmt kind, slp_tree node, int misalign, enum vect_cost_model_location where) { - return record_stmt_cost (body_cost_vec, count, kind, stmt_info, node, - STMT_VINFO_VECTYPE (stmt_info), misalign, where); + return record_stmt_cost (body_cost_vec, count, kind, node, + SLP_TREE_VECTYPE (node), misalign, where); } extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *); -- 2.43.0