On Wed, Nov 10, 2021 at 1:48 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This patch extends the reduction code to handle calls. So far > it's a structural change only; a later patch adds support for > specific function reductions. > > Most of the patch consists of using code_helper and gimple_match_op > to describe the reduction operations. The other main change is that > vectorizable_call now needs to handle fully-predicated reductions. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? > > Richard > > > gcc/ > * builtins.h (associated_internal_fn): Declare overload that > takes a (combined_cfn, return type) pair. > * builtins.c (associated_internal_fn): Split new overload out > of original fndecl version. Also provide an overload that takes > a (combined_cfn, return type) pair. > * internal-fn.h (commutative_binary_fn_p): Declare. > (associative_binary_fn_p): Likewise. > * internal-fn.c (commutative_binary_fn_p): New function, > split out from... > (first_commutative_argument): ...here. > (associative_binary_fn_p): New function. > * gimple-match.h (code_helper): Add a constructor that takes > internal functions. > (commutative_binary_op_p): Declare. > (associative_binary_op_p): Likewise. > (canonicalize_code): Likewise. > (directly_supported_p): Likewise. > (get_conditional_internal_fn): Likewise. > (gimple_build): New overload that takes a code_helper. > * gimple-fold.c (gimple_build): Likewise. > * gimple-match-head.c (commutative_binary_op_p): New function. > (associative_binary_op_p): Likewise. > (canonicalize_code): Likewise. > (directly_supported_p): Likewise. > (get_conditional_internal_fn): Likewise. > * tree-vectorizer.h: Include gimple-match.h. > (neutral_op_for_reduction): Take a code_helper instead of a tree_code. > (needs_fold_left_reduction_p): Likewise. > (reduction_fn_for_scalar_code): Likewise. > (vect_can_vectorize_without_simd_p): Declare a nNew overload that > takes > a code_helper. > * tree-vect-loop.c: Include case-cfn-macros.h. > (fold_left_reduction_fn): Take a code_helper instead of a tree_code. > (reduction_fn_for_scalar_code): Likewise. > (neutral_op_for_reduction): Likewise. > (needs_fold_left_reduction_p): Likewise. > (use_mask_by_cond_expr_p): Likewise. > (build_vect_cond_expr): Likewise. > (vect_create_partial_epilog): Likewise. Use gimple_build rather > than gimple_build_assign. > (check_reduction_path): Handle calls and operate on code_helpers > rather than tree_codes. > (vect_is_simple_reduction): Likewise. > (vect_model_reduction_cost): Likewise. > (vect_find_reusable_accumulator): Likewise. > (vect_create_epilog_for_reduction): Likewise. > (vect_transform_cycle_phi): Likewise. > (vectorizable_reduction): Likewise. Make more use of > lane_reduc_code_p. > (vect_transform_reduction): Use gimple_extract_op but expect > a tree_code for now. > (vect_can_vectorize_without_simd_p): New overload that takes > a code_helper. > * tree-vect-stmts.c (vectorizable_call): Handle reductions in > fully-masked loops. > * tree-vect-patterns.c (vect_mark_pattern_stmts): Use > gimple_extract_op when updating STMT_VINFO_REDUC_IDX. > --- > gcc/builtins.c | 46 ++++- > gcc/builtins.h | 1 + > gcc/gimple-fold.c | 9 + > gcc/gimple-match-head.c | 70 +++++++ > gcc/gimple-match.h | 20 ++ > gcc/internal-fn.c | 46 ++++- > gcc/internal-fn.h | 2 + > gcc/tree-vect-loop.c | 420 +++++++++++++++++++-------------------- > gcc/tree-vect-patterns.c | 23 ++- > gcc/tree-vect-stmts.c | 66 ++++-- > gcc/tree-vectorizer.h | 10 +- > 11 files changed, 455 insertions(+), 258 deletions(-) > > diff --git a/gcc/builtins.c b/gcc/builtins.c > index 384864bfb3a..03829c03a5a 100644 > --- a/gcc/builtins.c > +++ b/gcc/builtins.c > @@ -2139,17 +2139,17 @@ mathfn_built_in_type (combined_fn fn) > #undef SEQ_OF_CASE_MATHFN > } > > -/* If BUILT_IN_NORMAL function FNDECL has an associated internal function, > - return its code, otherwise return IFN_LAST. Note that this function > - only tests whether the function is defined in internals.def, not whether > - it is actually available on the target. */ > +/* Check whether there is an internal function associated with function FN > + and return type RETURN_TYPE. Return the function if so, otherwise return > + IFN_LAST. > > -internal_fn > -associated_internal_fn (tree fndecl) > + Note that this function only tests whether the function is defined in > + internals.def, not whether it is actually available on the target. */ > + > +static internal_fn > +associated_internal_fn (built_in_function fn, tree return_type) > { > - gcc_checking_assert (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL); > - tree return_type = TREE_TYPE (TREE_TYPE (fndecl)); > - switch (DECL_FUNCTION_CODE (fndecl)) > + switch (fn) > { > #define DEF_INTERNAL_FLT_FN(NAME, FLAGS, OPTAB, TYPE) \ > CASE_FLT_FN (BUILT_IN_##NAME): return IFN_##NAME; > @@ -2177,6 +2177,34 @@ associated_internal_fn (tree fndecl) > } > } > > +/* If BUILT_IN_NORMAL function FNDECL has an associated internal function, > + return its code, otherwise return IFN_LAST. Note that this function > + only tests whether the function is defined in internals.def, not whether > + it is actually available on the target. */ > + > +internal_fn > +associated_internal_fn (tree fndecl) > +{ > + gcc_checking_assert (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL); > + return associated_internal_fn (DECL_FUNCTION_CODE (fndecl), > + TREE_TYPE (TREE_TYPE (fndecl))); > +} > + > +/* Check whether there is an internal function associated with function CFN > + and return type RETURN_TYPE. Return the function if so, otherwise return > + IFN_LAST. > + > + Note that this function only tests whether the function is defined in > + internals.def, not whether it is actually available on the target. */ > + > +internal_fn > +associated_internal_fn (combined_fn cfn, tree return_type) > +{ > + if (internal_fn_p (cfn)) > + return as_internal_fn (cfn); > + return associated_internal_fn (as_builtin_fn (cfn), return_type); > +} > + > /* If CALL is a call to a BUILT_IN_NORMAL function that could be replaced > on the current target by a call to an internal function, return the > code of that internal function, otherwise return IFN_LAST. The caller > diff --git a/gcc/builtins.h b/gcc/builtins.h > index 5e4d86e9c37..c99670b12f1 100644 > --- a/gcc/builtins.h > +++ b/gcc/builtins.h > @@ -148,6 +148,7 @@ extern char target_percent_s_newline[4]; > extern bool target_char_cst_p (tree t, char *p); > extern rtx get_memory_rtx (tree exp, tree len); > > +extern internal_fn associated_internal_fn (combined_fn, tree); > extern internal_fn associated_internal_fn (tree); > extern internal_fn replacement_internal_fn (gcall *); > > diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c > index 9daf2cc590c..a937f130815 100644 > --- a/gcc/gimple-fold.c > +++ b/gcc/gimple-fold.c > @@ -8808,6 +8808,15 @@ gimple_build (gimple_seq *seq, location_t loc, > combined_fn fn, > return res; > }
Toplevel comment missing. You add this for two operands, please also add it for one and three (even if unused). > +tree > +gimple_build (gimple_seq *seq, location_t loc, code_helper code, > + tree type, tree op0, tree op1) > +{ > + if (code.is_tree_code ()) > + return gimple_build (seq, loc, tree_code (code), type, op0, op1); > + return gimple_build (seq, loc, combined_fn (code), type, op0, op1); > +} > + > /* Build the conversion (TYPE) OP with a result of type TYPE > with location LOC if such conversion is neccesary in GIMPLE, > simplifying it first. > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c > index d4d7d767075..4558a3db5fc 100644 > --- a/gcc/gimple-match-head.c > +++ b/gcc/gimple-match-head.c > @@ -1304,3 +1304,73 @@ optimize_successive_divisions_p (tree divisor, tree > inner_div) > } > return true; > } > + > +/* If CODE, operating on TYPE, represents a built-in function that has an > + associated internal function, return the associated internal function, > + otherwise return CODE. This function does not check whether the > + internal function is supported, only that it exists. */ Hmm, why not name the function associated_internal_fn then, or have it contain internal_fn? I also wonder why all the functions below are not member functions of code_helper? > +code_helper > +canonicalize_code (code_helper code, tree type) > +{ > + if (code.is_fn_code ()) > + return associated_internal_fn (combined_fn (code), type); > + return code; > +} > + > +/* Return true if CODE is a binary operation that is commutative when > + operating on type TYPE. */ > + > +bool > +commutative_binary_op_p (code_helper code, tree type) > +{ > + if (code.is_tree_code ()) > + return commutative_tree_code (tree_code (code)); > + auto cfn = combined_fn (code); > + return commutative_binary_fn_p (associated_internal_fn (cfn, type)); > +} Do we need commutative_ternary_op_p? Can we do a more generic commutative_p instead? > + > +/* Return true if CODE is a binary operation that is associative when > + operating on type TYPE. */ > + > +bool > +associative_binary_op_p (code_helper code, tree type) We only have associative_tree_code, is _binary relevant here? > +{ > + if (code.is_tree_code ()) > + return associative_tree_code (tree_code (code)); > + auto cfn = combined_fn (code); > + return associative_binary_fn_p (associated_internal_fn (cfn, type)); > +} > + > +/* Return true if the target directly supports operation CODE on type TYPE. > + QUERY_TYPE acts as for optab_for_tree_code. */ > + > +bool > +directly_supported_p (code_helper code, tree type, optab_subtype query_type) > +{ > + if (code.is_tree_code ()) > + { > + direct_optab optab = optab_for_tree_code (tree_code (code), type, > + query_type); > + return (optab != unknown_optab > + && optab_handler (optab, TYPE_MODE (type)) != CODE_FOR_nothing); > + } > + gcc_assert (query_type == optab_default > + || (query_type == optab_vector && VECTOR_TYPE_P (type)) > + || (query_type == optab_scalar && !VECTOR_TYPE_P (type))); > + internal_fn ifn = associated_internal_fn (combined_fn (code), type); > + return (direct_internal_fn_p (ifn) > + && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED)); > +} > + > +/* A wrapper around the internal-fn.c versions of get_conditional_internal_fn > + for a code_helper CODE operating on type TYPE. */ > + > +internal_fn > +get_conditional_internal_fn (code_helper code, tree type) > +{ > + if (code.is_tree_code ()) > + return get_conditional_internal_fn (tree_code (code)); > + auto cfn = combined_fn (code); > + return get_conditional_internal_fn (associated_internal_fn (cfn, type)); > +} > diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h > index 1b9dc3851c2..6d24a8a2378 100644 > --- a/gcc/gimple-match.h > +++ b/gcc/gimple-match.h > @@ -31,6 +31,7 @@ public: > code_helper () {} > code_helper (tree_code code) : rep ((int) code) {} > code_helper (combined_fn fn) : rep (-(int) fn) {} > + code_helper (internal_fn fn) : rep (-(int) as_combined_fn (fn)) {} > explicit operator tree_code () const { return (tree_code) rep; } > explicit operator combined_fn () const { return (combined_fn) -rep; } Do we want a explicit operator internal_fn () const { ... } for completeness? > bool is_tree_code () const { return rep > 0; } > @@ -346,4 +347,23 @@ tree maybe_push_res_to_seq (gimple_match_op *, > gimple_seq *, > void maybe_build_generic_op (gimple_match_op *); > > > +bool commutative_binary_op_p (code_helper, tree); > +bool associative_binary_op_p (code_helper, tree); > +code_helper canonicalize_code (code_helper, tree); > + > +#ifdef GCC_OPTABS_TREE_H > +bool directly_supported_p (code_helper, tree, optab_subtype = optab_default); > +#endif > + > +internal_fn get_conditional_internal_fn (code_helper, tree); > + > +extern tree gimple_build (gimple_seq *, location_t, > + code_helper, tree, tree, tree); > +inline tree > +gimple_build (gimple_seq *seq, code_helper code, tree type, tree op0, > + tree op1) > +{ > + return gimple_build (seq, UNKNOWN_LOCATION, code, type, op0, op1); > +} That looks a bit misplaced and should be in gimple-fold.h, no? > + > #endif /* GCC_GIMPLE_MATCH_H */ > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index da7d8355214..7b13db6dfe3 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -3815,6 +3815,43 @@ direct_internal_fn_supported_p (gcall *stmt, > optimization_type opt_type) > return direct_internal_fn_supported_p (fn, types, opt_type); > } > > +/* Return true if FN is a commutative binary operation. */ > + > +bool > +commutative_binary_fn_p (internal_fn fn) > +{ > + switch (fn) > + { > + case IFN_AVG_FLOOR: > + case IFN_AVG_CEIL: > + case IFN_MULH: > + case IFN_MULHS: > + case IFN_MULHRS: > + case IFN_FMIN: > + case IFN_FMAX: > + return true; > + > + default: > + return false; > + } > +} > + > +/* Return true if FN is an associative binary operation. */ > + > +bool > +associative_binary_fn_p (internal_fn fn) See above - without _binary? > +{ > + switch (fn) > + { > + case IFN_FMIN: > + case IFN_FMAX: > + return true; > + > + default: > + return false; > + } > +} > + > /* If FN is commutative in two consecutive arguments, return the > index of the first, otherwise return -1. */ > > @@ -3827,13 +3864,6 @@ first_commutative_argument (internal_fn fn) > case IFN_FMS: > case IFN_FNMA: > case IFN_FNMS: > - case IFN_AVG_FLOOR: > - case IFN_AVG_CEIL: > - case IFN_MULH: > - case IFN_MULHS: > - case IFN_MULHRS: > - case IFN_FMIN: > - case IFN_FMAX: > return 0; > > case IFN_COND_ADD: > @@ -3852,7 +3882,7 @@ first_commutative_argument (internal_fn fn) > return 1; > > default: > - return -1; > + return commutative_binary_fn_p (fn) ? 0 : -1; > } > } > > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index 19d0f849a5a..82ef4b0d792 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -206,6 +206,8 @@ direct_internal_fn_supported_p (internal_fn fn, tree > type0, tree type1, > opt_type); > } > > +extern bool commutative_binary_fn_p (internal_fn); I'm somewhat missing commutative_ternary_fn_p which would work on FMAs? So that was all API comments, the real changes below look good to me. Thanks, Richard. > +extern bool associative_binary_fn_p (internal_fn); > extern int first_commutative_argument (internal_fn); > > extern bool set_edom_supported_p (void); > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 1cd5dbcb6f7..cae895a88f2 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-vector-builder.h" > #include "vec-perm-indices.h" > #include "tree-eh.h" > +#include "case-cfn-macros.h" > > /* Loop Vectorization Pass. > > @@ -3125,17 +3126,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > it in *REDUC_FN if so. */ > > static bool > -fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) > +fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn) > { > - switch (code) > + if (code == PLUS_EXPR) > { > - case PLUS_EXPR: > *reduc_fn = IFN_FOLD_LEFT_PLUS; > return true; > - > - default: > - return false; > } > + return false; > } > > /* Function reduction_fn_for_scalar_code > @@ -3152,21 +3150,22 @@ fold_left_reduction_fn (tree_code code, internal_fn > *reduc_fn) > Return FALSE if CODE currently cannot be vectorized as reduction. */ > > bool > -reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) > +reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn) > { > - switch (code) > - { > + if (code.is_tree_code ()) > + switch (tree_code (code)) > + { > case MAX_EXPR: > - *reduc_fn = IFN_REDUC_MAX; > - return true; > + *reduc_fn = IFN_REDUC_MAX; > + return true; > > case MIN_EXPR: > - *reduc_fn = IFN_REDUC_MIN; > - return true; > + *reduc_fn = IFN_REDUC_MIN; > + return true; > > case PLUS_EXPR: > - *reduc_fn = IFN_REDUC_PLUS; > - return true; > + *reduc_fn = IFN_REDUC_PLUS; > + return true; > > case BIT_AND_EXPR: > *reduc_fn = IFN_REDUC_AND; > @@ -3182,12 +3181,13 @@ reduction_fn_for_scalar_code (enum tree_code code, > internal_fn *reduc_fn) > > case MULT_EXPR: > case MINUS_EXPR: > - *reduc_fn = IFN_LAST; > - return true; > + *reduc_fn = IFN_LAST; > + return true; > > default: > - return false; > + break; > } > + return false; > } > > /* If there is a neutral value X such that a reduction would not be affected > @@ -3197,32 +3197,35 @@ reduction_fn_for_scalar_code (enum tree_code code, > internal_fn *reduc_fn) > then INITIAL_VALUE is that value, otherwise it is null. */ > > tree > -neutral_op_for_reduction (tree scalar_type, tree_code code, tree > initial_value) > +neutral_op_for_reduction (tree scalar_type, code_helper code, > + tree initial_value) > { > - switch (code) > - { > - case WIDEN_SUM_EXPR: > - case DOT_PROD_EXPR: > - case SAD_EXPR: > - case PLUS_EXPR: > - case MINUS_EXPR: > - case BIT_IOR_EXPR: > - case BIT_XOR_EXPR: > - return build_zero_cst (scalar_type); > + if (code.is_tree_code ()) > + switch (tree_code (code)) > + { > + case WIDEN_SUM_EXPR: > + case DOT_PROD_EXPR: > + case SAD_EXPR: > + case PLUS_EXPR: > + case MINUS_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + return build_zero_cst (scalar_type); > > - case MULT_EXPR: > - return build_one_cst (scalar_type); > + case MULT_EXPR: > + return build_one_cst (scalar_type); > > - case BIT_AND_EXPR: > - return build_all_ones_cst (scalar_type); > + case BIT_AND_EXPR: > + return build_all_ones_cst (scalar_type); > > - case MAX_EXPR: > - case MIN_EXPR: > - return initial_value; > + case MAX_EXPR: > + case MIN_EXPR: > + return initial_value; > > - default: > - return NULL_TREE; > - } > + default: > + break; > + } > + return NULL_TREE; > } > > /* Error reporting helper for vect_is_simple_reduction below. GIMPLE > statement > @@ -3239,26 +3242,27 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, > const char *msg) > overflow must wrap. */ > > bool > -needs_fold_left_reduction_p (tree type, tree_code code) > +needs_fold_left_reduction_p (tree type, code_helper code) > { > /* CHECKME: check for !flag_finite_math_only too? */ > if (SCALAR_FLOAT_TYPE_P (type)) > - switch (code) > - { > - case MIN_EXPR: > - case MAX_EXPR: > - return false; > + { > + if (code.is_tree_code ()) > + switch (tree_code (code)) > + { > + case MIN_EXPR: > + case MAX_EXPR: > + return false; > > - default: > - return !flag_associative_math; > - } > + default: > + break; > + } > + return !flag_associative_math; > + } > > if (INTEGRAL_TYPE_P (type)) > - { > - if (!operation_no_trapping_overflow (type, code)) > - return true; > - return false; > - } > + return (!code.is_tree_code () > + || !operation_no_trapping_overflow (type, tree_code (code))); > > if (SAT_FIXED_POINT_TYPE_P (type)) > return true; > @@ -3272,7 +3276,7 @@ needs_fold_left_reduction_p (tree type, tree_code code) > > static bool > check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, > - tree loop_arg, enum tree_code *code, > + tree loop_arg, code_helper *code, > vec<std::pair<ssa_op_iter, use_operand_p> > &path) > { > auto_bitmap visited; > @@ -3347,45 +3351,57 @@ pop: > for (unsigned i = 1; i < path.length (); ++i) > { > gimple *use_stmt = USE_STMT (path[i].second); > - tree op = USE_FROM_PTR (path[i].second); > - if (! is_gimple_assign (use_stmt) > + gimple_match_op op; > + if (!gimple_extract_op (use_stmt, &op)) > + { > + fail = true; > + break; > + } > + unsigned int opi = op.num_ops; > + if (gassign *assign = dyn_cast<gassign *> (use_stmt)) > + { > /* The following make sure we can compute the operand index > easily plus it mostly disallows chaining via COND_EXPR condition > operands. */ > - || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use > - && (gimple_num_ops (use_stmt) <= 2 > - || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use) > - && (gimple_num_ops (use_stmt) <= 3 > - || gimple_assign_rhs3_ptr (use_stmt) != > path[i].second->use))) > + for (opi = 0; opi < op.num_ops; ++opi) > + if (gimple_assign_rhs1_ptr (assign) + opi == path[i].second->use) > + break; > + } > + else if (gcall *call = dyn_cast<gcall *> (use_stmt)) > + { > + for (opi = 0; opi < op.num_ops; ++opi) > + if (gimple_call_arg_ptr (call, opi) == path[i].second->use) > + break; > + } > + if (opi == op.num_ops) > { > fail = true; > break; > } > - tree_code use_code = gimple_assign_rhs_code (use_stmt); > - if (use_code == MINUS_EXPR) > + op.code = canonicalize_code (op.code, op.type); > + if (op.code == MINUS_EXPR) > { > - use_code = PLUS_EXPR; > + op.code = PLUS_EXPR; > /* Track whether we negate the reduction value each iteration. */ > - if (gimple_assign_rhs2 (use_stmt) == op) > + if (op.ops[1] == op.ops[opi]) > neg = ! neg; > } > - if (CONVERT_EXPR_CODE_P (use_code) > - && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt)), > - TREE_TYPE (gimple_assign_rhs1 > (use_stmt)))) > + if (CONVERT_EXPR_CODE_P (op.code) > + && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0]))) > ; > else if (*code == ERROR_MARK) > { > - *code = use_code; > - sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt))); > + *code = op.code; > + sign = TYPE_SIGN (op.type); > } > - else if (use_code != *code) > + else if (op.code != *code) > { > fail = true; > break; > } > - else if ((use_code == MIN_EXPR > - || use_code == MAX_EXPR) > - && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs > (use_stmt)))) > + else if ((op.code == MIN_EXPR > + || op.code == MAX_EXPR) > + && sign != TYPE_SIGN (op.type)) > { > fail = true; > break; > @@ -3397,7 +3413,7 @@ pop: > imm_use_iterator imm_iter; > gimple *op_use_stmt; > unsigned cnt = 0; > - FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op) > + FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi]) > if (!is_gimple_debug (op_use_stmt) > && (*code != ERROR_MARK > || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) > @@ -3427,7 +3443,7 @@ check_reduction_path (dump_user_location_t loc, loop_p > loop, gphi *phi, > tree loop_arg, enum tree_code code) > { > auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > - enum tree_code code_; > + code_helper code_; > return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) > && code_ == code); > } > @@ -3596,9 +3612,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, > stmt_vec_info phi_info, > gimple *def1 = SSA_NAME_DEF_STMT (op1); > if (gimple_bb (def1) > && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) > - && loop->inner > - && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) > - && is_gimple_assign (def1) > + && loop->inner > + && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) > + && (is_gimple_assign (def1) || is_gimple_call (def1)) > && is_a <gphi *> (phi_use_stmt) > && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) > { > @@ -3615,7 +3631,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, > stmt_vec_info phi_info, > > /* Look for the expression computing latch_def from then loop PHI result. > */ > auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > - enum tree_code code; > + code_helper code; > if (check_reduction_path (vect_location, loop, phi, latch_def, &code, > path)) > { > @@ -3633,15 +3649,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, > stmt_vec_info phi_info, > { > gimple *stmt = USE_STMT (path[i].second); > stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); > - STMT_VINFO_REDUC_IDX (stmt_info) > - = path[i].second->use - gimple_assign_rhs1_ptr (stmt); > - enum tree_code stmt_code = gimple_assign_rhs_code (stmt); > - bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code) > + gimple_match_op op; > + if (!gimple_extract_op (stmt, &op)) > + gcc_unreachable (); > + if (gassign *assign = dyn_cast<gassign *> (stmt)) > + STMT_VINFO_REDUC_IDX (stmt_info) > + = path[i].second->use - gimple_assign_rhs1_ptr (assign); > + else > + { > + gcall *call = as_a<gcall *> (stmt); > + STMT_VINFO_REDUC_IDX (stmt_info) > + = path[i].second->use - gimple_call_arg_ptr (call, 0); > + } > + bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code) > && (i == 1 || i == path.length () - 1)); > - if ((stmt_code != code && !leading_conversion) > + if ((op.code != code && !leading_conversion) > /* We can only handle the final value in epilogue > generation for reduction chains. */ > - || (i != 1 && !has_single_use (gimple_assign_lhs (stmt)))) > + || (i != 1 && !has_single_use (gimple_get_lhs (stmt)))) > is_slp_reduc = false; > /* For reduction chains we support a trailing/leading > conversions. We do not store those in the actual chain. */ > @@ -4390,8 +4415,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, > int ncopies, stmt_vector_for_cost *cost_vec) > { > int prologue_cost = 0, epilogue_cost = 0, inside_cost = 0; > - enum tree_code code; > - optab optab; > tree vectype; > machine_mode mode; > class loop *loop = NULL; > @@ -4407,7 +4430,9 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, > mode = TYPE_MODE (vectype); > stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); > > - code = gimple_assign_rhs_code (orig_stmt_info->stmt); > + gimple_match_op op; > + if (!gimple_extract_op (orig_stmt_info->stmt, &op)) > + gcc_unreachable (); > > if (reduction_type == EXTRACT_LAST_REDUCTION) > /* No extra instructions are needed in the prologue. The loop body > @@ -4501,20 +4526,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, > else > { > int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); > - tree bitsize = > - TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt))); > + tree bitsize = TYPE_SIZE (op.type); > int element_bitsize = tree_to_uhwi (bitsize); > int nelements = vec_size_in_bits / element_bitsize; > > - if (code == COND_EXPR) > - code = MAX_EXPR; > - > - optab = optab_for_tree_code (code, vectype, optab_default); > + if (op.code == COND_EXPR) > + op.code = MAX_EXPR; > > /* We have a whole vector shift available. */ > - if (optab != unknown_optab > - && VECTOR_MODE_P (mode) > - && optab_handler (optab, mode) != CODE_FOR_nothing > + if (VECTOR_MODE_P (mode) > + && directly_supported_p (op.code, vectype) > && have_whole_vector_shift (mode)) > { > /* Final reduction via vector shifts and the reduction operator. > @@ -4855,7 +4876,7 @@ vect_find_reusable_accumulator (loop_vec_info > loop_vinfo, > initialize the accumulator with a neutral value instead. */ > if (!operand_equal_p (initial_value, main_adjustment)) > return false; > - tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > + code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); > initial_values[0] = neutral_op_for_reduction (TREE_TYPE > (initial_value), > code, initial_value); > } > @@ -4870,7 +4891,7 @@ vect_find_reusable_accumulator (loop_vec_info > loop_vinfo, > CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */ > > static tree > -vect_create_partial_epilog (tree vec_def, tree vectype, enum tree_code code, > +vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code, > gimple_seq *seq) > { > unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)).to_constant > (); > @@ -4953,9 +4974,7 @@ vect_create_partial_epilog (tree vec_def, tree vectype, > enum tree_code code, > gimple_seq_add_stmt_without_update (seq, epilog_stmt); > } > > - new_temp = make_ssa_name (vectype1); > - epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); > - gimple_seq_add_stmt_without_update (seq, epilog_stmt); > + new_temp = gimple_build (seq, code, vectype1, dst1, dst2); > } > > return new_temp; > @@ -5032,7 +5051,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > } > gphi *reduc_def_stmt > = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt > (stmt_info))->stmt); > - enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > + code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); > internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); > tree vectype; > machine_mode mode; > @@ -5699,14 +5718,9 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE > (vectype), > stype, nunits1); > reduce_with_shift = have_whole_vector_shift (mode1); > - if (!VECTOR_MODE_P (mode1)) > + if (!VECTOR_MODE_P (mode1) > + || !directly_supported_p (code, vectype1)) > reduce_with_shift = false; > - else > - { > - optab optab = optab_for_tree_code (code, vectype1, optab_default); > - if (optab_handler (optab, mode1) == CODE_FOR_nothing) > - reduce_with_shift = false; > - } > > /* First reduce the vector to the desired vector size we should > do shift reduction on by combining upper and lower halves. */ > @@ -5944,7 +5958,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > for (k = 0; k < live_out_stmts.size (); k++) > { > stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]); > - scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt); > + scalar_dest = gimple_get_lhs (scalar_stmt_info->stmt); > > phis.create (3); > /* Find the loop-closed-use at the loop exit of the original scalar > @@ -6277,7 +6291,7 @@ is_nonwrapping_integer_induction (stmt_vec_info > stmt_vinfo, class loop *loop) > CODE is the code for the operation. COND_FN is the conditional internal > function, if it exists. VECTYPE_IN is the type of the vector input. */ > static bool > -use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn, > +use_mask_by_cond_expr_p (code_helper code, internal_fn cond_fn, > tree vectype_in) > { > if (cond_fn != IFN_LAST > @@ -6285,15 +6299,17 @@ use_mask_by_cond_expr_p (enum tree_code code, > internal_fn cond_fn, > OPTIMIZE_FOR_SPEED)) > return false; > > - switch (code) > - { > - case DOT_PROD_EXPR: > - case SAD_EXPR: > - return true; > + if (code.is_tree_code ()) > + switch (tree_code (code)) > + { > + case DOT_PROD_EXPR: > + case SAD_EXPR: > + return true; > > - default: > - return false; > - } > + default: > + break; > + } > + return false; > } > > /* Insert a conditional expression to enable masked vectorization. CODE is > the > @@ -6301,10 +6317,10 @@ use_mask_by_cond_expr_p (enum tree_code code, > internal_fn cond_fn, > mask. GSI is a statement iterator used to place the new conditional > expression. */ > static void > -build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask, > +build_vect_cond_expr (code_helper code, tree vop[3], tree mask, > gimple_stmt_iterator *gsi) > { > - switch (code) > + switch (tree_code (code)) > { > case DOT_PROD_EXPR: > { > @@ -6390,12 +6406,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > slp_instance slp_node_instance, > stmt_vector_for_cost *cost_vec) > { > - tree scalar_dest; > tree vectype_in = NULL_TREE; > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > enum vect_def_type cond_reduc_dt = vect_unknown_def_type; > stmt_vec_info cond_stmt_vinfo = NULL; > - tree scalar_type; > int i; > int ncopies; > bool single_defuse_cycle = false; > @@ -6508,18 +6522,18 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > info_for_reduction to work. */ > if (STMT_VINFO_LIVE_P (vdef)) > STMT_VINFO_REDUC_DEF (def) = phi_info; > - gassign *assign = dyn_cast <gassign *> (vdef->stmt); > - if (!assign) > + gimple_match_op op; > + if (!gimple_extract_op (vdef->stmt, &op)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "reduction chain includes calls.\n"); > + "reduction chain includes unsupported" > + " statement type.\n"); > return false; > } > - if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) > + if (CONVERT_EXPR_CODE_P (op.code)) > { > - if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (assign)), > - TREE_TYPE (gimple_assign_rhs1 > (assign)))) > + if (!tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0]))) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -6530,7 +6544,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > else if (!stmt_info) > /* First non-conversion stmt. */ > stmt_info = vdef; > - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > + reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)]; > reduc_chain_length++; > if (!stmt_info && slp_node) > slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; > @@ -6588,26 +6602,24 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > > tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; > - gassign *stmt = as_a <gassign *> (stmt_info->stmt); > - enum tree_code code = gimple_assign_rhs_code (stmt); > - bool lane_reduc_code_p > - = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR); > - int op_type = TREE_CODE_LENGTH (code); > + gimple_match_op op; > + if (!gimple_extract_op (stmt_info->stmt, &op)) > + gcc_unreachable (); > + bool lane_reduc_code_p = (op.code == DOT_PROD_EXPR > + || op.code == WIDEN_SUM_EXPR > + || op.code == SAD_EXPR); > enum optab_subtype optab_query_kind = optab_vector; > - if (code == DOT_PROD_EXPR > - && TYPE_SIGN (TREE_TYPE (gimple_assign_rhs1 (stmt))) > - != TYPE_SIGN (TREE_TYPE (gimple_assign_rhs2 (stmt)))) > + if (op.code == DOT_PROD_EXPR > + && (TYPE_SIGN (TREE_TYPE (op.ops[0])) > + != TYPE_SIGN (TREE_TYPE (op.ops[1])))) > optab_query_kind = optab_vector_mixed_sign; > > - > - scalar_dest = gimple_assign_lhs (stmt); > - scalar_type = TREE_TYPE (scalar_dest); > - if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) > - && !SCALAR_FLOAT_TYPE_P (scalar_type)) > + if (!POINTER_TYPE_P (op.type) && !INTEGRAL_TYPE_P (op.type) > + && !SCALAR_FLOAT_TYPE_P (op.type)) > return false; > > /* Do not try to vectorize bit-precision reductions. */ > - if (!type_has_mode_precision_p (scalar_type)) > + if (!type_has_mode_precision_p (op.type)) > return false; > > /* For lane-reducing ops we're reducing the number of reduction PHIs > @@ -6626,25 +6638,23 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > The last use is the reduction variable. In case of nested cycle this > assumption is not true: we use reduc_index to record the index of the > reduction variable. */ > - slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type); > + slp_tree *slp_op = XALLOCAVEC (slp_tree, op.num_ops); > /* We need to skip an extra operand for COND_EXPRs with embedded > comparison. */ > unsigned opno_adjust = 0; > - if (code == COND_EXPR > - && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt))) > + if (op.code == COND_EXPR && COMPARISON_CLASS_P (op.ops[0])) > opno_adjust = 1; > - for (i = 0; i < op_type; i++) > + for (i = 0; i < (int) op.num_ops; i++) > { > /* The condition of COND_EXPR is checked in vectorizable_condition(). > */ > - if (i == 0 && code == COND_EXPR) > + if (i == 0 && op.code == COND_EXPR) > continue; > > stmt_vec_info def_stmt_info; > enum vect_def_type dt; > - tree op; > if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info, > - i + opno_adjust, &op, &slp_op[i], &dt, &tem, > - &def_stmt_info)) > + i + opno_adjust, &op.ops[i], &slp_op[i], &dt, > + &tem, &def_stmt_info)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -6669,13 +6679,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))) > vectype_in = tem; > > - if (code == COND_EXPR) > + if (op.code == COND_EXPR) > { > /* Record how the non-reduction-def value of COND_EXPR is defined. > */ > if (dt == vect_constant_def) > { > cond_reduc_dt = dt; > - cond_reduc_val = op; > + cond_reduc_val = op.ops[i]; > } > if (dt == vect_induction_def > && def_stmt_info > @@ -6845,7 +6855,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > (and also the same tree-code) when generating the epilog code and > when generating the code inside the loop. */ > > - enum tree_code orig_code = STMT_VINFO_REDUC_CODE (phi_info); > + code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info); > STMT_VINFO_REDUC_CODE (reduc_info) = orig_code; > > vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); > @@ -6864,7 +6874,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) > && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1u)) > ; > - else if (needs_fold_left_reduction_p (scalar_type, orig_code)) > + else if (needs_fold_left_reduction_p (op.type, orig_code)) > { > /* When vectorizing a reduction chain w/o SLP the reduction PHI > is not directy used in stmt. */ > @@ -6879,8 +6889,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > STMT_VINFO_REDUC_TYPE (reduc_info) > = reduction_type = FOLD_LEFT_REDUCTION; > } > - else if (!commutative_tree_code (orig_code) > - || !associative_tree_code (orig_code)) > + else if (!commutative_binary_op_p (orig_code, op.type) > + || !associative_binary_op_p (orig_code, op.type)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -6935,7 +6945,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > else if (reduction_type == COND_REDUCTION) > { > int scalar_precision > - = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); > + = GET_MODE_PRECISION (SCALAR_TYPE_MODE (op.type)); > cr_index_scalar_type = make_unsigned_type (scalar_precision); > cr_index_vector_type = get_same_sized_vectype (cr_index_scalar_type, > vectype_out); > @@ -7121,28 +7131,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > > if (single_defuse_cycle || lane_reduc_code_p) > { > - gcc_assert (code != COND_EXPR); > + gcc_assert (op.code != COND_EXPR); > > /* 4. Supportable by target? */ > bool ok = true; > > /* 4.1. check support for the operation in the loop */ > - optab optab = optab_for_tree_code (code, vectype_in, optab_query_kind); > - if (!optab) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "no optab.\n"); > - ok = false; > - } > - > machine_mode vec_mode = TYPE_MODE (vectype_in); > - if (ok && optab_handler (optab, vec_mode) == CODE_FOR_nothing) > + if (!directly_supported_p (op.code, vectype_in, optab_query_kind)) > { > if (dump_enabled_p ()) > dump_printf (MSG_NOTE, "op not supported by target.\n"); > if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) > - || !vect_can_vectorize_without_simd_p (code)) > + || !vect_can_vectorize_without_simd_p (op.code)) > ok = false; > else > if (dump_enabled_p ()) > @@ -7150,7 +7151,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > } > > if (vect_emulated_vector_p (vectype_in) > - && !vect_can_vectorize_without_simd_p (code)) > + && !vect_can_vectorize_without_simd_p (op.code)) > { > if (dump_enabled_p ()) > dump_printf (MSG_NOTE, "using word mode not possible.\n"); > @@ -7183,11 +7184,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > > if (slp_node > && !(!single_defuse_cycle > - && code != DOT_PROD_EXPR > - && code != WIDEN_SUM_EXPR > - && code != SAD_EXPR > + && !lane_reduc_code_p > && reduction_type != FOLD_LEFT_REDUCTION)) > - for (i = 0; i < op_type; i++) > + for (i = 0; i < (int) op.num_ops; i++) > if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in)) > { > if (dump_enabled_p ()) > @@ -7206,10 +7205,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > /* Cost the reduction op inside the loop if transformed via > vect_transform_reduction. Otherwise this is costed by the > separate vectorizable_* routines. */ > - if (single_defuse_cycle > - || code == DOT_PROD_EXPR > - || code == WIDEN_SUM_EXPR > - || code == SAD_EXPR) > + if (single_defuse_cycle || lane_reduc_code_p) > record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, > vect_body); > > if (dump_enabled_p () > @@ -7220,9 +7216,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > /* All but single defuse-cycle optimized, lane-reducing and fold-left > reductions go through their own vectorizable_* routines. */ > if (!single_defuse_cycle > - && code != DOT_PROD_EXPR > - && code != WIDEN_SUM_EXPR > - && code != SAD_EXPR > + && !lane_reduc_code_p > && reduction_type != FOLD_LEFT_REDUCTION) > { > stmt_vec_info tem > @@ -7238,10 +7232,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > { > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > - internal_fn cond_fn = get_conditional_internal_fn (code); > + internal_fn cond_fn = get_conditional_internal_fn (op.code, op.type); > > if (reduction_type != FOLD_LEFT_REDUCTION > - && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in) > + && !use_mask_by_cond_expr_p (op.code, cond_fn, vectype_in) > && (cond_fn == IFN_LAST > || !direct_internal_fn_supported_p (cond_fn, vectype_in, > OPTIMIZE_FOR_SPEED))) > @@ -7294,24 +7288,11 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == > vect_double_reduction_def); > } > > - gassign *stmt = as_a <gassign *> (stmt_info->stmt); > - enum tree_code code = gimple_assign_rhs_code (stmt); > - int op_type = TREE_CODE_LENGTH (code); > - > - /* Flatten RHS. */ > - tree ops[3]; > - switch (get_gimple_rhs_class (code)) > - { > - case GIMPLE_TERNARY_RHS: > - ops[2] = gimple_assign_rhs3 (stmt); > - /* Fall thru. */ > - case GIMPLE_BINARY_RHS: > - ops[0] = gimple_assign_rhs1 (stmt); > - ops[1] = gimple_assign_rhs2 (stmt); > - break; > - default: > - gcc_unreachable (); > - } > + gimple_match_op op; > + if (!gimple_extract_op (stmt_info->stmt, &op)) > + gcc_unreachable (); > + gcc_assert (op.code.is_tree_code ()); > + auto code = tree_code (op.code); > > /* All uses but the last are expected to be defined in the loop. > The last use is the reduction variable. In case of nested cycle this > @@ -7359,7 +7340,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); > return vectorize_fold_left_reduction > (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, > code, > - reduc_fn, ops, vectype_in, reduc_index, masks); > + reduc_fn, op.ops, vectype_in, reduc_index, masks); > } > > bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info); > @@ -7369,22 +7350,22 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > || code == SAD_EXPR); > > /* Create the destination vector */ > - tree scalar_dest = gimple_assign_lhs (stmt); > + tree scalar_dest = gimple_assign_lhs (stmt_info->stmt); > tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out); > > vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, > single_defuse_cycle && reduc_index == 0 > - ? NULL_TREE : ops[0], &vec_oprnds0, > + ? NULL_TREE : op.ops[0], &vec_oprnds0, > single_defuse_cycle && reduc_index == 1 > - ? NULL_TREE : ops[1], &vec_oprnds1, > - op_type == ternary_op > + ? NULL_TREE : op.ops[1], &vec_oprnds1, > + op.num_ops == 3 > && !(single_defuse_cycle && reduc_index == 2) > - ? ops[2] : NULL_TREE, &vec_oprnds2); > + ? op.ops[2] : NULL_TREE, &vec_oprnds2); > if (single_defuse_cycle) > { > gcc_assert (!slp_node); > vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, > - ops[reduc_index], > + op.ops[reduc_index], > reduc_index == 0 ? &vec_oprnds0 > : (reduc_index == 1 ? &vec_oprnds1 > : &vec_oprnds2)); > @@ -7414,7 +7395,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, > } > else > { > - if (op_type == ternary_op) > + if (op.num_ops == 3) > vop[2] = vec_oprnds2[i]; > > if (masked_loop_p && mask_by_cond_expr) > @@ -7546,7 +7527,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, > { > tree initial_value > = (num_phis == 1 ? initial_values[0] : NULL_TREE); > - tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > + code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); > tree neutral_op > = neutral_op_for_reduction (TREE_TYPE (vectype_out), > code, initial_value); > @@ -7603,7 +7584,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, > if (!reduc_info->reduc_initial_values.is_empty ()) > { > initial_def = reduc_info->reduc_initial_values[0]; > - enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > + code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); > tree neutral_op > = neutral_op_for_reduction (TREE_TYPE (initial_def), > code, initial_def); > @@ -7901,6 +7882,15 @@ vect_can_vectorize_without_simd_p (tree_code code) > } > } > > +/* Likewise, but taking a code_helper. */ > + > +bool > +vect_can_vectorize_without_simd_p (code_helper code) > +{ > + return (code.is_tree_code () > + && vect_can_vectorize_without_simd_p (tree_code (code))); > +} > + > /* Function vectorizable_induction > > Check if STMT_INFO performs an induction computation that can be > vectorized. > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index 854cbcff390..26421ee5511 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -5594,8 +5594,10 @@ vect_mark_pattern_stmts (vec_info *vinfo, > /* Transfer reduction path info to the pattern. */ > if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1) > { > - tree lookfor = gimple_op (orig_stmt_info_saved->stmt, > - 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info)); > + gimple_match_op op; > + if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op)) > + gcc_unreachable (); > + tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)]; > /* Search the pattern def sequence and the main pattern stmt. Note > we may have inserted all into a containing pattern def sequence > so the following is a bit awkward. */ > @@ -5615,14 +5617,15 @@ vect_mark_pattern_stmts (vec_info *vinfo, > do > { > bool found = false; > - for (unsigned i = 1; i < gimple_num_ops (s); ++i) > - if (gimple_op (s, i) == lookfor) > - { > - STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1; > - lookfor = gimple_get_lhs (s); > - found = true; > - break; > - } > + if (gimple_extract_op (s, &op)) > + for (unsigned i = 0; i < op.num_ops; ++i) > + if (op.ops[i] == lookfor) > + { > + STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i; > + lookfor = gimple_get_lhs (s); > + found = true; > + break; > + } > if (s == pattern_stmt) > { > if (!found && dump_enabled_p ()) > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 03cc7267cf8..1e197023b98 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -3202,7 +3202,6 @@ vectorizable_call (vec_info *vinfo, > int ndts = ARRAY_SIZE (dt); > int ncopies, j; > auto_vec<tree, 8> vargs; > - auto_vec<tree, 8> orig_vargs; > enum { NARROW, NONE, WIDEN } modifier; > size_t i, nargs; > tree lhs; > @@ -3426,6 +3425,8 @@ vectorizable_call (vec_info *vinfo, > needs to be generated. */ > gcc_assert (ncopies >= 1); > > + int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); > + internal_fn cond_fn = get_conditional_internal_fn (ifn); > vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : > NULL); > if (!vec_stmt) /* transformation not required. */ > { > @@ -3446,14 +3447,33 @@ vectorizable_call (vec_info *vinfo, > record_stmt_cost (cost_vec, ncopies / 2, > vec_promote_demote, stmt_info, 0, vect_body); > > - if (loop_vinfo && mask_opno >= 0) > + if (loop_vinfo > + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) > + && (reduc_idx >= 0 || mask_opno >= 0)) > { > - unsigned int nvectors = (slp_node > - ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > - : ncopies); > - tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); > - vect_record_loop_mask (loop_vinfo, masks, nvectors, > - vectype_out, scalar_mask); > + if (reduc_idx >= 0 > + && (cond_fn == IFN_LAST > + || !direct_internal_fn_supported_p (cond_fn, vectype_out, > + OPTIMIZE_FOR_SPEED))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "can't use a fully-masked loop because no" > + " conditional operation is available.\n"); > + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; > + } > + else > + { > + unsigned int nvectors > + = (slp_node > + ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > + : ncopies); > + tree scalar_mask = NULL_TREE; > + if (mask_opno >= 0) > + scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); > + vect_record_loop_mask (loop_vinfo, masks, nvectors, > + vectype_out, scalar_mask); > + } > } > return true; > } > @@ -3468,12 +3488,17 @@ vectorizable_call (vec_info *vinfo, > vec_dest = vect_create_destination_var (scalar_dest, vectype_out); > > bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > + unsigned int vect_nargs = nargs; > + if (masked_loop_p && reduc_idx >= 0) > + { > + ifn = cond_fn; > + vect_nargs += 2; > + } > > if (modifier == NONE || ifn != IFN_LAST) > { > tree prev_res = NULL_TREE; > - vargs.safe_grow (nargs, true); > - orig_vargs.safe_grow (nargs, true); > + vargs.safe_grow (vect_nargs, true); > auto_vec<vec<tree> > vec_defs (nargs); > for (j = 0; j < ncopies; ++j) > { > @@ -3488,12 +3513,23 @@ vectorizable_call (vec_info *vinfo, > /* Arguments are ready. Create the new vector stmt. */ > FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) > { > + int varg = 0; > + if (masked_loop_p && reduc_idx >= 0) > + { > + unsigned int vec_num = vec_oprnds0.length (); > + /* Always true for SLP. */ > + gcc_assert (ncopies == 1); > + vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num, > + vectype_out, i); > + } > size_t k; > for (k = 0; k < nargs; k++) > { > vec<tree> vec_oprndsk = vec_defs[k]; > - vargs[k] = vec_oprndsk[i]; > + vargs[varg++] = vec_oprndsk[i]; > } > + if (masked_loop_p && reduc_idx >= 0) > + vargs[varg++] = vargs[reduc_idx + 1]; > gimple *new_stmt; > if (modifier == NARROW) > { > @@ -3546,6 +3582,10 @@ vectorizable_call (vec_info *vinfo, > continue; > } > > + int varg = 0; > + if (masked_loop_p && reduc_idx >= 0) > + vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies, > + vectype_out, j); > for (i = 0; i < nargs; i++) > { > op = gimple_call_arg (stmt, i); > @@ -3556,8 +3596,10 @@ vectorizable_call (vec_info *vinfo, > op, &vec_defs[i], > vectypes[i]); > } > - orig_vargs[i] = vargs[i] = vec_defs[i][j]; > + vargs[varg++] = vec_defs[i][j]; > } > + if (masked_loop_p && reduc_idx >= 0) > + vargs[varg++] = vargs[reduc_idx + 1]; > > if (mask_opno >= 0 && masked_loop_p) > { > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index f8f30641512..8330cd897b8 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -28,6 +28,7 @@ typedef class _stmt_vec_info *stmt_vec_info; > #include "target.h" > #include "internal-fn.h" > #include "tree-ssa-operands.h" > +#include "gimple-match.h" > > /* Used for naming of new temporaries. */ > enum vect_var_kind { > @@ -1192,7 +1193,7 @@ public: > enum vect_reduction_type reduc_type; > > /* The original reduction code, to be used in the epilogue. */ > - enum tree_code reduc_code; > + code_helper reduc_code; > /* An internal function we should use in the epilogue. */ > internal_fn reduc_fn; > > @@ -2151,7 +2152,7 @@ extern tree vect_create_addr_base_for_vector_ref > (vec_info *, > tree); > > /* In tree-vect-loop.c. */ > -extern tree neutral_op_for_reduction (tree, tree_code, tree); > +extern tree neutral_op_for_reduction (tree, code_helper, tree); > extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info > loop_vinfo); > bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *); > /* Used in tree-vect-loop-manip.c */ > @@ -2160,7 +2161,7 @@ extern opt_result > vect_determine_partial_vectors_and_peeling (loop_vec_info, > /* Used in gimple-loop-interchange.c and tree-parloops.c. */ > extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, > enum tree_code); > -extern bool needs_fold_left_reduction_p (tree, tree_code); > +extern bool needs_fold_left_reduction_p (tree, code_helper); > /* Drive for loop analysis stage. */ > extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); > extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); > @@ -2178,7 +2179,7 @@ extern tree vect_get_loop_len (loop_vec_info, > vec_loop_lens *, unsigned int, > unsigned int); > extern gimple_seq vect_gen_len (tree, tree, tree, tree); > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); > -extern bool reduction_fn_for_scalar_code (enum tree_code, internal_fn *); > +extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); > > /* Drive for loop transformation stage. */ > extern class loop *vect_transform_loop (loop_vec_info, gimple *); > @@ -2216,6 +2217,7 @@ extern bool vectorizable_phi (vec_info *, > stmt_vec_info, gimple **, slp_tree, > stmt_vector_for_cost *); > extern bool vect_emulated_vector_p (tree); > extern bool vect_can_vectorize_without_simd_p (tree_code); > +extern bool vect_can_vectorize_without_simd_p (code_helper); > extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, > stmt_vector_for_cost *, > stmt_vector_for_cost *, > -- > 2.25.1 >