Tamar Christina <tamar.christ...@arm.com> writes: > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index > 56a4e9a8b63f3cae0bf596bf5d22893887dc80e8..0722679d6e66e5dd5af4ec1ce591f7c38b76d07f > 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -2195,6 +2195,22 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo, > return false; > } > > + /* With early break vectorization we don't know whether the accesses will > stay > + inside the loop or not. TODO: The early break adjustment code can be > + implemented the same way for vectorizable_linear_induction. However we > + can't test this today so reject it. */ > + if (niters_skip != NULL_TREE > + && vect_use_loop_mask_for_alignment_p (loop_vinfo) > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > + && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "Peeling for alignement using masking is not supported" > + " for nonlinear induction when using early breaks.\n"); > + return false; > + } > + > return true; > }
FTR, I was wondering here whether we should predict this in advance and instead drop down to peeling for alignment without masks. It probably isn't worth the effort though. > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index > 9413dcef702597ab27165e676546b190e2bd36ba..6dcdee19bb250993d8cc6b0057d2fa46245d04d9 > 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -10678,6 +10678,104 @@ vectorizable_induction (loop_vec_info loop_vinfo, > LOOP_VINFO_MASK_SKIP_NITERS > (loop_vinfo)); > peel_mul = gimple_build_vector_from_val (&init_stmts, > step_vectype, peel_mul); > + > + /* If early break then we have to create a new PHI which we can use as > + an offset to adjust the induction reduction in early exits. */ > + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + { > + auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); > + tree ty_skip_niters = TREE_TYPE (skip_niters); > + tree break_lhs_phi = NULL_TREE; > + break_lhs_phi = vect_get_new_vect_var (ty_skip_niters, > + vect_scalar_var, > + "pfa_iv_offset"); > + gphi *nphi = create_phi_node (break_lhs_phi, bb); > + add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION); > + add_phi_arg (nphi, build_zero_cst (ty_skip_niters), > + loop_latch_edge (iv_loop), UNKNOWN_LOCATION); > + > + /* Rewrite all the early exit usages. */ > + tree phi_lhs = PHI_RESULT (phi); > + imm_use_iterator iter; > + use_operand_p use_p; > + gimple *use_stmt; > + > + FOR_EACH_IMM_USE_FAST (use_p, iter, phi_lhs) > + { > + use_stmt = USE_STMT (use_p); > + if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)) > + && is_a <gphi *> (use_stmt)) > + { > + auto gsi = gsi_last_bb (use_stmt->bb); > + for (auto exit : get_loop_exit_edges (iv_loop)) > + if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo) > + && bb == exit->src) > + { > + /* Now create the PHI for the outside loop usage to > + retrieve the value for the offset counter. */ > + tree rphi_lhs = make_ssa_name (ty_skip_niters); > + gphi *rphi > + = create_phi_node (rphi_lhs, use_stmt->bb); > + for (unsigned i = 0; i < gimple_phi_num_args (rphi); > + i++) > + SET_PHI_ARG_DEF (rphi, i, PHI_RESULT (nphi)); > + > + tree tmp = make_ssa_name (TREE_TYPE (phi_lhs)); > + tree stmt_lhs = PHI_RESULT (use_stmt); > + imm_use_iterator iter2; > + gimple *use_stmt2; > + use_operand_p use2_p; > + > + /* Now rewrite all the usages first. */ > + FOR_EACH_IMM_USE_STMT (use_stmt2, iter2, stmt_lhs) > + FOR_EACH_IMM_USE_ON_STMT (use2_p, iter2) > + SET_USE (use2_p, tmp); > + > + /* And then generate the adjustment to avoid the > + update code from updating this new usage. The > + multiplicaiton is to get the original IV and the > + downwards counting IV correct. */ typo: multiplication But I don't think it's just upcounting vs downcounting. An upcounting iv with step 2 would also need the multiplication. That is, we're applying PHI_RESULT (rphi) iv updates, and so need to add the iv step that many times. So IMO it would be clearer to drop the reference specifically to downcounting here. The patch LGTM with the comment nit fixed, but Richi should have the final say. Thanks, Richard > + gimple_seq iv_stmts = NULL; > + tree rphi_step > + = gimple_convert (&iv_stmts, ty_skip_niters, > + step_expr); > + tree tmp2 > + = gimple_build (&iv_stmts, MULT_EXPR, > + ty_skip_niters, rphi_step, > + PHI_RESULT (rphi)); > + > + if (POINTER_TYPE_P (TREE_TYPE (stmt_lhs))) > + tmp2 > + = gimple_build (&iv_stmts, POINTER_PLUS_EXPR, > + TREE_TYPE (stmt_lhs), stmt_lhs, > + tmp2); > + else > + { > + tmp2 > + = gimple_convert (&iv_stmts, > + TREE_TYPE (stmt_lhs), > + tmp2); > + tmp2 > + = gimple_build (&iv_stmts, PLUS_EXPR, > + TREE_TYPE (stmt_lhs), > stmt_lhs, > + tmp2); > + } > + > + gsi_insert_seq_before (&gsi, iv_stmts, > + GSI_SAME_STMT); > + gimple *cvt_stmt = > + gimple_build_assign (tmp, VIEW_CONVERT_EXPR, > + build1 (VIEW_CONVERT_EXPR, > + TREE_TYPE (phi_lhs), > + tmp2)); > + gsi_insert_before (&gsi, cvt_stmt, GSI_SAME_STMT); > + } > + /* All early exits point to the same common block, so we > + only have to find the first one. */ > + break; > + } > + } > + } > } > tree step_mul = NULL_TREE; > unsigned ivn;