[Review for patch 1] Joel Hutton <joel.hut...@arm.com> writes: > From e7b3017b7b5879204e9d61760a85cc84beeb4fe0 Mon Sep 17 00:00:00 2001 > From: Joel Hutton <joel.hut...@arm.com> > Date: Wed, 25 Aug 2021 14:31:15 +0100 > Subject: [PATCH 1/3] [vect-patterns] Refactor to allow internal_fn's > > Hi all, > > This refactor allows widening patterns (such as widen_plus/widen_minus) to be > represented as > either internal_fns or tree_codes. > > [vect-patterns] Refactor as internal_fn's > > Refactor vect-patterns to allow patterns to be internal_fns starting > with widening_plus/minus patterns > > gcc/ChangeLog: > > * gimple-match.h (class code_helper): Move code_helper class to more > visible function. > * internal-fn.h (internal_fn_name): Add internal_fn range check. > * tree-vect-patterns.c (vect_recog_widen_op_pattern): Change > function prototype. > * tree-vect-stmts.c (vect_gen_widened_results_half): Refactor to use > code_helper, build internal_fns > (vect_create_vectorized_promotion_stmts): Refactor to use > code_helper. > (vectorizable_conversion): Refactor to use code_helper. > (supportable_widening_operation): Refactor to use code_helper. > (supportable_narrowing_operation): Refactor to use code_helper. > * tree-vectorizer.h (supportable_widening_operation): Refactor to use > code_helper. > (supportable_narrowing_operation): Refactor to use code_helper. > * tree.h (class code_helper): Refactor to use code_helper.
Sorry for two things. First, this review is very late. Second, I'd forgotten about this code_helper stuff. I had a nagging feeling that we'd discussed moving and consolidating code_helper, but I couldn't remember the context :-( This code has therefore changed quite a bit in the meantime. I think moving it to tree.h is still a good thing to do. > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index 854cbcff390..4a8ea67e62f 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -1245,7 +1245,7 @@ vect_recog_sad_pattern (vec_info *vinfo, > static gimple * > vect_recog_widen_op_pattern (vec_info *vinfo, > stmt_vec_info last_stmt_info, tree *type_out, > - tree_code orig_code, tree_code wide_code, > + tree_code orig_code, code_helper wide_code_or_ifn, I think it'd be better to keep the original “wide_code” name and try to remove as many places as possible in which switching based on tree_code or internal_fn is necessary. The recent gimple-match.h patches should help with that, but more routines might be needed. Same comment for the other name changes. > bool shift_p, const char *name) > { > gimple *last_stmt = last_stmt_info->stmt; > @@ -1288,15 +1288,16 @@ vect_recog_widen_op_pattern (vec_info *vinfo, > vecctype = get_vectype_for_scalar_type (vinfo, ctype); > } > > - enum tree_code dummy_code; > + code_helper dummy_c_or_ifn; > int dummy_int; > auto_vec<tree> dummy_vec; > if (!vectype > || !vecitype > || !vecctype > - || !supportable_widening_operation (vinfo, wide_code, last_stmt_info, > + || !supportable_widening_operation (vinfo, wide_code_or_ifn, > + last_stmt_info, > vecitype, vectype, > - &dummy_code, &dummy_code, > + &dummy_c_or_ifn, &dummy_c_or_ifn, > &dummy_int, &dummy_vec)) > return NULL; > > @@ -1309,8 +1310,16 @@ vect_recog_widen_op_pattern (vec_info *vinfo, > 2, oprnd, half_type, unprom, vectype); > > tree var = vect_recog_temp_ssa_var (itype, NULL); > - gimple *pattern_stmt = gimple_build_assign (var, wide_code, > - oprnd[0], oprnd[1]); > + gimple *pattern_stmt; > + if (wide_code_or_ifn.is_tree_code ()) > + pattern_stmt = gimple_build_assign (var, wide_code_or_ifn, > + oprnd[0], oprnd[1]); > + else > + { > + internal_fn fn = as_internal_fn ((combined_fn) wide_code_or_ifn); > + pattern_stmt = gimple_build_call_internal (fn, 2, oprnd[0], oprnd[1]); > + gimple_call_set_lhs (pattern_stmt, var); > + } For example, I think we should hide this inside a new: gimple_build (var, wide_code, oprnd[0], oprnd[1]); that works directly on code_helper, similarly to the new code_helper gimple_build interfaces. (Unfortunately, I don't think we can use gimple_build directly here.) > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 2284ad069e4..d5e1619fabc 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -4504,7 +4504,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, > stmt_vec_info stmt_info, > STMT_INFO is the original scalar stmt that we are vectorizing. */ > > static gimple * > -vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code, > +vect_gen_widened_results_half (vec_info *vinfo, code_helper ch, > tree vec_oprnd0, tree vec_oprnd1, int op_type, > tree vec_dest, gimple_stmt_iterator *gsi, > stmt_vec_info stmt_info) > @@ -4513,14 +4513,16 @@ vect_gen_widened_results_half (vec_info *vinfo, enum > tree_code code, > tree new_temp; > > /* Generate half of the widened result: */ > - gcc_assert (op_type == TREE_CODE_LENGTH (code)); > if (op_type != binary_op) > vec_oprnd1 = NULL; > - new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); > + if (ch.is_tree_code ()) > + new_stmt = gimple_build_assign (vec_dest, ch, vec_oprnd0, vec_oprnd1); > + else > + new_stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch), > + 2, vec_oprnd0, vec_oprnd1); Similarly here. I guess the combined_fn/internal_fn path will also need to cope with null trailing operands, for consistency with the tree_code one. > new_temp = make_ssa_name (vec_dest, new_stmt); > - gimple_assign_set_lhs (new_stmt, new_temp); > + gimple_set_lhs (new_stmt, new_temp); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > - > return new_stmt; > } > > @@ -4597,8 +4599,8 @@ vect_create_vectorized_promotion_stmts (vec_info *vinfo, > vec<tree> *vec_oprnds1, > stmt_vec_info stmt_info, tree vec_dest, > gimple_stmt_iterator *gsi, > - enum tree_code code1, > - enum tree_code code2, int op_type) > + code_helper ch1, > + code_helper ch2, int op_type) > { > int i; > tree vop0, vop1, new_tmp1, new_tmp2; > @@ -4614,10 +4616,10 @@ vect_create_vectorized_promotion_stmts (vec_info > *vinfo, > vop1 = NULL_TREE; > > /* Generate the two halves of promotion operation. */ > - new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1, > + new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1, > op_type, vec_dest, gsi, > stmt_info); > - new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1, > + new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1, > op_type, vec_dest, gsi, > stmt_info); > if (is_gimple_call (new_stmt1)) > @@ -4714,8 +4716,9 @@ vectorizable_conversion (vec_info *vinfo, > tree scalar_dest; > tree op0, op1 = NULL_TREE; > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > - enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; > - enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; > + tree_code code1; > + code_helper code_or_ifn, code_or_ifn1, code_or_ifn2; > + code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; > tree new_temp; > enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; > int ndts = 2; > @@ -4744,31 +4747,28 @@ vectorizable_conversion (vec_info *vinfo, > && ! vec_stmt) > return false; > > - gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); > - if (!stmt) > + gimple* stmt = stmt_info->stmt; > + if (!(is_gimple_assign (stmt) || is_gimple_call (stmt))) > return false; > > - if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) > - return false; > + if (is_gimple_assign (stmt)) > + { > + code_or_ifn = gimple_assign_rhs_code (stmt); > + } > + else > + code_or_ifn = gimple_call_combined_fn (stmt); It might be possible to use gimple_extract_op here (only recently added). This would also provide the number of operands directly, instead of needing “op_type”. It would also provide an array of operands. > - code = gimple_assign_rhs_code (stmt); > - if (!CONVERT_EXPR_CODE_P (code) > - && code != FIX_TRUNC_EXPR > - && code != FLOAT_EXPR > - && code != WIDEN_PLUS_EXPR > - && code != WIDEN_MINUS_EXPR > - && code != WIDEN_MULT_EXPR > - && code != WIDEN_LSHIFT_EXPR) Is it safe to drop this check independently of parts 2 and 3? (Genuine question, haven't checked in detail.) > + if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME) > return false; > > - bool widen_arith = (code == WIDEN_PLUS_EXPR > - || code == WIDEN_MINUS_EXPR > - || code == WIDEN_MULT_EXPR > - || code == WIDEN_LSHIFT_EXPR); > - op_type = TREE_CODE_LENGTH (code); > + bool widen_arith = (code_or_ifn == WIDEN_PLUS_EXPR > + || code_or_ifn == WIDEN_MINUS_EXPR > + || code_or_ifn == WIDEN_MULT_EXPR > + || code_or_ifn == WIDEN_LSHIFT_EXPR); > + op_type = TREE_CODE_LENGTH ((tree_code) code_or_ifn); > > /* Check types of lhs and rhs. */ > - scalar_dest = gimple_assign_lhs (stmt); > + scalar_dest = gimple_get_lhs (stmt); > lhs_type = TREE_TYPE (scalar_dest); > vectype_out = STMT_VINFO_VECTYPE (stmt_info); > > @@ -4784,7 +4784,8 @@ vectorizable_conversion (vec_info *vinfo, > } > > rhs_type = TREE_TYPE (op0); > - if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) > + if ((code_or_ifn.is_tree_code () && code_or_ifn != FIX_TRUNC_EXPR > + && code_or_ifn != FLOAT_EXPR) I don't think we want the is_tree_code condition here. The existing != should work. > && !((INTEGRAL_TYPE_P (lhs_type) > && INTEGRAL_TYPE_P (rhs_type)) > || (SCALAR_FLOAT_TYPE_P (lhs_type) > […] > @@ -11744,7 +11763,7 @@ supportable_widening_operation (vec_info *vinfo, > if (loop_info) > vect_loop = LOOP_VINFO_LOOP (loop_info); > > - switch (code) > + switch (code_or_ifn.as_tree_code ()) > { > case WIDEN_MULT_EXPR: > /* The result of a vectorized widening operation usually requires > @@ -11773,7 +11792,7 @@ supportable_widening_operation (vec_info *vinfo, > vectorization. */ > /* TODO: Another case in which order doesn't *really* matter is when we > widen and then contract again, e.g. (short)((int)x * y >> 8). > - Normally, pack_trunc performs an even/odd permute, whereas the > + Normally, pack_trunc performs an even/odd permute, whereas the > repack from an even/odd expansion would be an interleave, which > would be significantly simpler for e.g. AVX2. */ > /* In any case, in order to avoid duplicating the code below, recurse > @@ -11785,20 +11804,22 @@ supportable_widening_operation (vec_info *vinfo, > && !nested_in_vect_loop_p (vect_loop, stmt_info) > && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR, > stmt_info, vectype_out, > - vectype_in, code1, code2, > - multi_step_cvt, interm_types)) > - { > - /* Elements in a vector with vect_used_by_reduction property cannot > - be reordered if the use chain with this property does not have > the > - same operation. One such an example is s += a * b, where > elements > - in a and b cannot be reordered. Here we check if the vector > defined > - by STMT is only directly used in the reduction statement. */ > + vectype_in, code_or_ifn1, > + code_or_ifn2, multi_step_cvt, > + interm_types)) > + { > + /* Elements in a vector with vect_used_by_reduction property cannot > + be reordered if the use chain with this property does not have > + the same operation. One such an example is s += a * b, where > + elements in a and b cannot be reordered. Here we check if the > + vector defined by STMT is only directly used in the reduction > + statement. */ > tree lhs = gimple_assign_lhs (stmt_info->stmt); > stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs); > if (use_stmt_info > && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) > return true; > - } > + } > c1 = VEC_WIDEN_MULT_LO_EXPR; > c2 = VEC_WIDEN_MULT_HI_EXPR; > break; > @@ -11849,6 +11870,17 @@ supportable_widening_operation (vec_info *vinfo, > c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR; > break; > > + case MAX_TREE_CODES: > + break; > + > + default: > + gcc_unreachable (); > + } > + > + switch (code_or_ifn.as_fn_code ()) > + { > + case CFN_LAST: > + break; > default: > gcc_unreachable (); > } > @@ -11856,13 +11888,13 @@ supportable_widening_operation (vec_info *vinfo, > if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) > std::swap (c1, c2); > > - if (code == FIX_TRUNC_EXPR) > + if (code_or_ifn == FIX_TRUNC_EXPR) > { > /* The signedness is determined from output operand. */ > optab1 = optab_for_tree_code (c1, vectype_out, optab_default); > optab2 = optab_for_tree_code (c2, vectype_out, optab_default); > } > - else if (CONVERT_EXPR_CODE_P (code) > + else if (CONVERT_EXPR_CODE_P ((tree_code) code_or_ifn) I think this should be as_tree_code (), so that it's safe for internal functions if (tree_code) ever becomes a checked convrsion in future. Same for other instances. > && VECTOR_BOOLEAN_TYPE_P (wide_vectype) > && VECTOR_BOOLEAN_TYPE_P (vectype) > && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype) > […] > @@ -12000,7 +12031,7 @@ supportable_widening_operation (vec_info *vinfo, > bool > supportable_narrowing_operation (enum tree_code code, > tree vectype_out, tree vectype_in, > - enum tree_code *code1, int *multi_step_cvt, > + void* _code1, int *multi_step_cvt, This might be rehashing an old conversation, sorry, but why does this need to be void? > vec<tree> *interm_types) > { > machine_mode vec_mode; > @@ -12013,6 +12044,7 @@ supportable_narrowing_operation (enum tree_code code, > machine_mode intermediate_mode, prev_mode; > int i; > bool uns; > + tree_code * code1 = (tree_code*) _code1; > > *multi_step_cvt = 0; > switch (code) > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index bd6f334d15f..70c06264c11 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2030,13 +2030,16 @@ extern bool vect_is_simple_use (vec_info *, > stmt_vec_info, slp_tree, > enum vect_def_type *, > tree *, stmt_vec_info * = NULL); > extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree); > -extern bool supportable_widening_operation (vec_info *, > - enum tree_code, stmt_vec_info, > - tree, tree, enum tree_code *, > - enum tree_code *, int *, > - vec<tree> *); > +extern bool supportable_widening_operation (vec_info *vinfo, > + code_helper code_or_ifn, > + stmt_vec_info stmt_info, > + tree vectype_out, tree vectype_in, > + code_helper *code_or_ifn1, > + code_helper *code_or_ifn2, > + int *multi_step_cvt, > + vec<tree> *interm_types); Normal style is to keep the variable names out of the header. The documentation lives in the .c file, so in practice, anyone who wants to add a new caller will need to look there anyway. Thanks, Richard > extern bool supportable_narrowing_operation (enum tree_code, tree, tree, > - enum tree_code *, int *, > + void *, int *, > vec<tree> *); > > extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, > diff --git a/gcc/tree.h b/gcc/tree.h > index f62c00bc870..346565f84ce 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -6546,5 +6546,31 @@ extern unsigned fndecl_dealloc_argno (tree); > if nonnull, set the second argument to the referenced enclosing > object or pointer. Otherwise return null. */ > extern tree get_attr_nonstring_decl (tree, tree * = NULL); > +/* Helper to transparently allow tree codes and builtin function codes > + exist in one storage entity. */ > +class code_helper > +{ > +public: > + code_helper () {} > + code_helper (tree_code code) : rep ((int) code) {} > + code_helper (combined_fn fn) : rep (-(int) fn) {} > + operator tree_code () const { return is_tree_code () ? > + (tree_code) rep : > + ERROR_MARK; } > + operator combined_fn () const { return is_fn_code () ? > + (combined_fn) -rep: > + CFN_LAST; } > + bool is_tree_code () const { return rep > 0; } > + bool is_fn_code () const { return rep < 0; } > + int get_rep () const { return rep; } > + > + enum tree_code as_tree_code () const { return is_tree_code () ? > + (tree_code)* this : MAX_TREE_CODES; } > + combined_fn as_fn_code () const { return is_fn_code () ? (combined_fn) > *this > + : CFN_LAST;} > + > +private: > + int rep; > +}; > > #endif /* GCC_TREE_H */