On Mon, 27 Nov 2023, Tamar Christina wrote:
> Ping
>
> > -----Original Message-----
> > From: Tamar Christina <[email protected]>
> > Sent: Monday, November 6, 2023 7:40 AM
> > To: [email protected]
> > Cc: nd <[email protected]>; [email protected]; [email protected]
> > Subject: [PATCH 9/21]middle-end: implement vectorizable_early_exit for
> > codegen of exit code
> >
> > Hi All,
> >
> > This implements vectorable_early_exit which is used as the codegen part of
> > vectorizing a gcond.
> >
> > For the most part it shares the majority of the code with
> > vectorizable_comparison with addition that it needs to be able to reduce
> > multiple resulting statements into a single one for use in the gcond, and
> > also
> > needs to be able to perform masking on the comparisons.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts
> > without
> > lhs.
> > (vectorizable_early_exit): New.
> > (vect_analyze_stmt, vect_transform_stmt): Use it.
> > (vect_is_simple_use, vect_get_vector_types_for_stmt): Support
> > gcond.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > 36aeca60a22cfaea8d3b43348000d75de1d525c7..4809b822632279493a84
> > 3d402a833c9267bb315e 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -12475,7 +12475,7 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> > vec<tree> vec_oprnds0 = vNULL;
> > vec<tree> vec_oprnds1 = vNULL;
> > tree mask_type;
> > - tree mask;
> > + tree mask = NULL_TREE;
> >
> > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> > return false;
> > @@ -12615,8 +12615,9 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> > /* Transform. */
> >
> > /* Handle def. */
> > - lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
> > - mask = vect_create_destination_var (lhs, mask_type);
> > + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); if (lhs)
> > + mask = vect_create_destination_var (lhs, mask_type);
wrecked line-break / white-space
> >
> > vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
> > rhs1, &vec_oprnds0, vectype,
> > @@ -12630,7 +12631,10 @@ vectorizable_comparison_1 (vec_info *vinfo,
> > tree vectype,
> > gimple *new_stmt;
> > vec_rhs2 = vec_oprnds1[i];
> >
> > - new_temp = make_ssa_name (mask);
> > + if (lhs)
> > + new_temp = make_ssa_name (mask);
> > + else
> > + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
> > if (bitop1 == NOP_EXPR)
> > {
> > new_stmt = gimple_build_assign (new_temp, code, @@ -12709,6
> > +12713,196 @@ vectorizable_comparison (vec_info *vinfo,
> > return true;
> > }
> >
> > +/* Check to see if the current early break given in STMT_INFO is valid for
> > + vectorization. */
> > +
> > +static bool
> > +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
> > + gimple_stmt_iterator *gsi, gimple **vec_stmt,
> > + slp_tree slp_node, stmt_vector_for_cost *cost_vec) {
{ goes to the next line
> > + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> > + if (!loop_vinfo
> > + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
> > + return false;
> > +
> > + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_early_exit_def)
> > + return false;
> > +
> > + if (!STMT_VINFO_RELEVANT_P (stmt_info))
> > + return false;
> > +
> > + gimple_match_op op;
> > + if (!gimple_extract_op (stmt_info->stmt, &op))
> > + gcc_unreachable ();
> > + gcc_assert (op.code.is_tree_code ()); auto code = tree_code
> > + (op.code);
missed line break
> > +
> > + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); gcc_assert
> > + (vectype_out);
likewise.
> > + tree var_op = op.ops[0];
> > +
> > + /* When vectorizing things like pointer comparisons we will assume that
> > + the VF of both operands are the same. e.g. a pointer must be compared
> > + to a pointer. We'll leave this up to vectorizable_comparison_1 to
> > + check further. */
> > + tree vectype_op = vectype_out;
> > + if (SSA_VAR_P (var_op))
TREE_CODE (var_op) == SSA_NAME
> > + {
> > + stmt_vec_info operand0_info
> > + = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (var_op));
lookup_def (var_op)
> > + if (!operand0_info)
> > + return false;
> > +
> > + /* If we're in a pattern get the type of the original statement. */
> > + if (STMT_VINFO_IN_PATTERN_P (operand0_info))
> > + operand0_info = STMT_VINFO_RELATED_STMT (operand0_info);
> > + vectype_op = STMT_VINFO_VECTYPE (operand0_info);
> > + }
I think you want to use vect_is_simple_use on var_op instead, that's
the canonical way for querying operands.
> > +
> > + tree truth_type = truth_type_for (vectype_op); machine_mode mode =
> > + TYPE_MODE (truth_type); int ncopies;
> > +
more line break issues ... (also below, check yourself)
shouldn't STMT_VINFO_VECTYPE already match truth_type here? If not
it looks to be set wrongly (or shouldn't be set at all)
> > + if (slp_node)
> > + ncopies = 1;
> > + else
> > + ncopies = vect_get_num_copies (loop_vinfo, truth_type);
> > +
> > + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); bool
> > + masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> > +
what about with_len?
> > + /* Analyze only. */
> > + if (!vec_stmt)
> > + {
> > + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "can't vectorize early exit because the "
> > + "target doesn't support flag setting vector "
> > + "comparisons.\n");
> > + return false;
> > + }
> > +
> > + if (!expand_vec_cmp_expr_p (vectype_op, truth_type, NE_EXPR))
Why NE_EXPR? This looks wrong. Or vectype_op is wrong if you're
emitting
mask = op0 CMP op1;
if (mask != 0)
I think you need to check for CMP, not NE_EXPR.
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "can't vectorize early exit because the "
> > + "target does not support boolean vector "
> > + "comparisons for type %T.\n", truth_type);
> > + return false;
> > + }
> > +
> > + if (ncopies > 1
> > + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing)
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "can't vectorize early exit because the "
> > + "target does not support boolean vector OR for "
> > + "type %T.\n", truth_type);
> > + return false;
> > + }
> > +
> > + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code,
> > gsi,
> > + vec_stmt, slp_node, cost_vec))
> > + return false;
I suppose vectorizable_comparison_1 will check this again, so the above
is redundant?
> > + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> > + vect_record_loop_mask (loop_vinfo, masks, ncopies, truth_type,
> > NULL);
LENs missing (or disabling partial vectors).
> > + return true;
> > + }
> > +
> > + /* Tranform. */
> > +
> > + tree new_temp = NULL_TREE;
> > + gimple *new_stmt = NULL;
> > +
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_NOTE, vect_location, "transform
> > + early-exit.\n");
> > +
> > + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi,
> > + vec_stmt, slp_node, cost_vec))
> > + gcc_unreachable ();
> > +
> > + gimple *stmt = STMT_VINFO_STMT (stmt_info); basic_block cond_bb =
> > + gimple_bb (stmt); gimple_stmt_iterator cond_gsi = gsi_last_bb
> > + (cond_bb);
> > +
> > + vec<tree> stmts;
> > +
> > + if (slp_node)
> > + stmts = SLP_TREE_VEC_DEFS (slp_node);
> > + else
> > + {
> > + auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
> > + stmts.create (vec_stmts.length ());
> > + for (auto stmt : vec_stmts)
> > + stmts.quick_push (gimple_assign_lhs (stmt));
> > + }
>
> > + /* Determine if we need to reduce the final value. */
> > + if (stmts.length () > 1)
> > + {
> > + /* We build the reductions in a way to maintain as much parallelism
> > as
> > + possible. */
> > + auto_vec<tree> workset (stmts.length ());
> > + workset.splice (stmts);
> > + while (workset.length () > 1)
> > + {
> > + new_temp = make_temp_ssa_name (truth_type, NULL,
> > "vexit_reduc");
> > + tree arg0 = workset.pop ();
> > + tree arg1 = workset.pop ();
> > + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0,
> > arg1);
> > + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
> > + &cond_gsi);
> > + if (slp_node)
> > + slp_node->push_vec_def (new_stmt);
> > + else
> > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
> > + workset.quick_insert (0, new_temp);
Reduction epilogue handling has similar code to reduce a set of vectors
to a single one with an operation. I think we want to share that code.
> > + }
> > + }
> > + else
> > + new_temp = stmts[0];
> > +
> > + gcc_assert (new_temp);
> > +
> > + tree cond = new_temp;
> > + if (masked_loop_p)
> > + {
> > + tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
> > truth_type, 0);
> > + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond,
> > + &cond_gsi);
I don't think this is correct when 'stmts' had more than one vector?
> > + }
> > +
> > + /* Now build the new conditional. Pattern gimple_conds get dropped
> > during
> > + codegen so we must replace the original insn. */ if
> > + (is_pattern_stmt_p (stmt_info))
> > + stmt = STMT_VINFO_STMT (STMT_VINFO_RELATED_STMT (stmt_info));
vect_original_stmt?
> > +
> > + tree t = fold_build2 (NE_EXPR, boolean_type_node, cond,
> > + build_zero_cst (truth_type));
> > + t = canonicalize_cond_expr_cond (t);
> > + gimple_cond_set_condition_from_tree ((gcond*)stmt, t);
Please use gimple_cond_set_{lhs,rhs,code} instead of going through
GENERIC.
> > + update_stmt (stmt);
> > +
> > + if (slp_node)
> > + slp_node->push_vec_def (stmt);
> > + else
> > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (stmt);
I don't think we need those, in fact we still have the original defs
from vectorizable_comparison_1 here? I'd just truncate both vectors.
> > +
> > +
> > + if (!slp_node)
> > + *vec_stmt = stmt;
I think you leak 'stmts' for !slp
Otherwise looks good.
Richard.
> > + return true;
> > +}
> > +
> > /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
> > can handle all live statements in the node. Otherwise return true
> > if STMT_INFO is not live or if vectorizable_live_operation can handle
> > it.
> > @@ -12928,7 +13122,9 @@ vect_analyze_stmt (vec_info *vinfo,
> > || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
> > stmt_info, NULL, node)
> > || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
> > - stmt_info, NULL, node, cost_vec));
> > + stmt_info, NULL, node, cost_vec)
> > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> > + cost_vec));
> > else
> > {
> > if (bb_vinfo)
> > @@ -12951,7 +13147,10 @@ vect_analyze_stmt (vec_info *vinfo,
> > NULL, NULL, node, cost_vec)
> > || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
> > cost_vec)
> > - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
> > + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
> > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> > + cost_vec));
> > +
> > }
> >
> > if (node)
> > @@ -13110,6 +13309,12 @@ vect_transform_stmt (vec_info *vinfo,
> > gcc_assert (done);
> > break;
> >
> > + case loop_exit_ctrl_vec_info_type:
> > + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
> > + slp_node, NULL);
> > + gcc_assert (done);
> > + break;
> > +
> > default:
> > if (!STMT_VINFO_LIVE_P (stmt_info))
> > {
> > @@ -13511,7 +13716,7 @@ vect_is_simple_use (tree operand, vec_info
> > *vinfo, enum vect_def_type *dt,
> > case vect_first_order_recurrence:
> > dump_printf (MSG_NOTE, "first order recurrence\n");
> > break;
> > - case vect_early_exit_def:
> > + case vect_early_exit_def:
> > dump_printf (MSG_NOTE, "early exit\n");
> > break;
> > case vect_unknown_def_type:
> >
> >
> >
> >
> > --
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)