On Fri, 18 Oct 2024, Robin Dapp wrote: > This patch adds an else operand to vectorized masked load calls. > The current implementation adds else-value arguments to the respective > target-querying functions that is used to supply the vectorizer with the > proper else value. > > Right now, the only spot where a zero else value is actually enforced is > tree-ifcvt. Loop masking and other instances of masked loads in the > vectorizer itself do not use vec_cond_exprs. > > gcc/ChangeLog: > > * optabs-query.cc (supports_vec_convert_optab_p): Return icode. > (get_supported_else_val): Return supported else value for > optab's operand at index. > (supports_vec_gather_load_p): Add else argument. > (supports_vec_scatter_store_p): Ditto. > * optabs-query.h (supports_vec_gather_load_p): Ditto. > (get_supported_else_val): Ditto. > * optabs-tree.cc (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > (target_supports_len_load_store_p): Ditto. > (get_len_load_store_mode): Ditto. > * optabs-tree.h (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto. > (vect_gather_scatter_fn_p): Ditto. > (vect_check_gather_scatter): Ditto. > (vect_load_lanes_supported): Ditto. > * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): > Ditto. > * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for > else operand. > (vect_slp_analyze_node_operations): Skip undefined else operand. > * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p): > Add else operand handling. > (vect_get_vec_defs_for_operand): Handle undefined else operand. > (check_load_store_for_partial_vectors): Add else argument. > (vect_truncate_gather_scatter_offset): Ditto. > (vect_use_strided_gather_scatters_p): Ditto. > (get_group_load_store_type): Ditto. > (get_load_store_type): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > (vect_build_one_gather_load_call): Add zero else operand. > (vectorizable_load): Use else operand. > * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else > argument. > (vect_load_lanes_supported): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > --- > gcc/optabs-query.cc | 59 ++++++--- > gcc/optabs-query.h | 3 +- > gcc/optabs-tree.cc | 62 ++++++--- > gcc/optabs-tree.h | 8 +- > gcc/tree-vect-data-refs.cc | 77 +++++++---- > gcc/tree-vect-patterns.cc | 18 ++- > gcc/tree-vect-slp.cc | 22 +++- > gcc/tree-vect-stmts.cc | 257 +++++++++++++++++++++++++++++-------- > gcc/tree-vectorizer.h | 11 +- > 9 files changed, 394 insertions(+), 123 deletions(-) > > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc > index cc52bc0f5ea..347a1322479 100644 > --- a/gcc/optabs-query.cc > +++ b/gcc/optabs-query.cc > @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see > #include "rtl.h" > #include "recog.h" > #include "vec-perm-indices.h" > +#include "internal-fn.h" > +#include "memmodel.h" > +#include "optabs.h" > > struct target_optabs default_target_optabs; > struct target_optabs *this_fn_optabs = &default_target_optabs; > @@ -672,34 +675,48 @@ lshift_cheap_p (bool speed_p) > that mode, given that the second mode is always an integer vector. > If MODE is VOIDmode, return true if OP supports any vector mode. */ > > -static bool > +static enum insn_code > supports_vec_convert_optab_p (optab op, machine_mode mode)
The name is bad now. supported_vec_convert_optab (...) maybe? > { > int start = mode == VOIDmode ? 0 : mode; > int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode; > + enum insn_code icode = CODE_FOR_nothing; > for (int i = start; i <= end; ++i) > if (VECTOR_MODE_P ((machine_mode) i)) > for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) > - if (convert_optab_handler (op, (machine_mode) i, > - (machine_mode) j) != CODE_FOR_nothing) > - return true; > + { > + if ((icode > + = convert_optab_handler (op, (machine_mode) i, > + (machine_mode) j)) != CODE_FOR_nothing) > + return icode; > + } > > - return false; > + return icode; > } > > /* If MODE is not VOIDmode, return true if vec_gather_load is available for > that mode. If MODE is VOIDmode, return true if gather_load is available > - for at least one vector mode. */ > + for at least one vector mode. > + In that case, and if ELSVALS is nonzero, store the supported else values > + into the vector it points to. */ > > bool > -supports_vec_gather_load_p (machine_mode mode) > +supports_vec_gather_load_p (machine_mode mode, auto_vec<int> *elsvals) > { > - if (!this_fn_optabs->supports_vec_gather_load[mode]) > - this_fn_optabs->supports_vec_gather_load[mode] > - = (supports_vec_convert_optab_p (gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode) > - ? 1 : -1); > + enum insn_code icode = CODE_FOR_nothing; > + if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals) > + { > + icode = supports_vec_convert_optab_p (gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_len_gather_load_optab, mode); > + this_fn_optabs->supports_vec_gather_load[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > + > + if (elsvals && icode != CODE_FOR_nothing) > + get_supported_else_vals (icode, MASK_LOAD_GATHER_ELSE_IDX, *elsvals); > > return this_fn_optabs->supports_vec_gather_load[mode] > 0; > } > @@ -711,12 +728,18 @@ supports_vec_gather_load_p (machine_mode mode) > bool > supports_vec_scatter_store_p (machine_mode mode) > { > + enum insn_code icode; > if (!this_fn_optabs->supports_vec_scatter_store[mode]) > - this_fn_optabs->supports_vec_scatter_store[mode] > - = (supports_vec_convert_optab_p (scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode) > - ? 1 : -1); > + { > + icode = supports_vec_convert_optab_p (scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supports_vec_convert_optab_p (mask_len_scatter_store_optab, > + mode); > + this_fn_optabs->supports_vec_scatter_store[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > > return this_fn_optabs->supports_vec_scatter_store[mode] > 0; > } > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h > index 0cb2c21ba85..5e0f59ee4b9 100644 > --- a/gcc/optabs-query.h > +++ b/gcc/optabs-query.h > @@ -191,7 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool); > bool can_atomic_exchange_p (machine_mode, bool); > bool can_atomic_load_p (machine_mode); > bool lshift_cheap_p (bool); > -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); > +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, > + auto_vec<int> * = nullptr); > bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); > bool can_vec_extract (machine_mode, machine_mode); > > diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc > index b69a5bc3676..ebdb6051c14 100644 > --- a/gcc/optabs-tree.cc > +++ b/gcc/optabs-tree.cc > @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see > #include "optabs.h" > #include "optabs-tree.h" > #include "stor-layout.h" > +#include "internal-fn.h" > > /* Return the optab used for computing the operation given by the tree code, > CODE and the tree EXP. This function is not always usable (for example, > it > @@ -552,24 +553,38 @@ target_supports_op_p (tree type, enum tree_code code, > or mask_len_{load,store}. > This helper function checks whether target supports masked > load/store and return corresponding IFN in the last argument > - (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */ > + (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). > + If there is support and ELSVALS is nonzero add the possible else values > + to the vector it points to. */ > > -static bool > +bool > target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode, > - bool is_load, internal_fn *ifn) > + bool is_load, internal_fn *ifn, > + auto_vec<int> *elsvals) > { > optab op = is_load ? maskload_optab : maskstore_optab; > optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab; > - if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE; > + if (elsvals) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LOAD), > + *elsvals); > return true; > } > - else if (convert_optab_handler (len_op, mode, mask_mode) != > CODE_FOR_nothing) > + else if ((icode = convert_optab_handler (len_op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsvals) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD), > + *elsvals); > return true; > } > return false; > @@ -584,13 +599,15 @@ bool > can_vec_mask_load_store_p (machine_mode mode, > machine_mode mask_mode, > bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, > + auto_vec<int> *elsvals) > { > machine_mode vmode; > > /* If mode is vector mode, check it directly. */ > if (VECTOR_MODE_P (mode)) > - return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn); > + return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn, > + elsvals); > > /* Otherwise, return true if there is some vector mode with > the mask load/store supported. */ > @@ -604,7 +621,8 @@ can_vec_mask_load_store_p (machine_mode mode, > vmode = targetm.vectorize.preferred_simd_mode (smode); > if (VECTOR_MODE_P (vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsvals)) > return true; > > auto_vector_modes vector_modes; > @@ -612,7 +630,8 @@ can_vec_mask_load_store_p (machine_mode mode, > for (machine_mode base_mode : vector_modes) > if (related_vector_mode (base_mode, smode).exists (&vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsvals)) > return true; > return false; > } > @@ -622,11 +641,13 @@ can_vec_mask_load_store_p (machine_mode mode, > or mask_len_{load,store}. > This helper function checks whether target supports len > load/store and return corresponding IFN in the last argument > - (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */ > + (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). > + If there is support and ELSVALS is nonzero add the possible else values > + to the vector it points to. */ > > static bool > target_supports_len_load_store_p (machine_mode mode, bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, auto_vec<int> *elsvals) > { > optab op = is_load ? len_load_optab : len_store_optab; > optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab; > @@ -638,11 +659,17 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > return true; > } > machine_mode mask_mode; > + enum insn_code icode; > if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode) > - && convert_optab_handler (masked_op, mode, mask_mode) != > CODE_FOR_nothing) > + && ((icode = convert_optab_handler (masked_op, mode, mask_mode)) > + != CODE_FOR_nothing)) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsvals) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD), > + *elsvals); > return true; > } > return false; > @@ -656,22 +683,25 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > VnQI to wrap the other supportable same size vector modes. > An additional output in the last argument which is the IFN pointer. > We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according > - which optab is supported in the target. */ > + which optab is supported in the target. > + If there is support and ELSVALS is nonzero add the possible else values > + to the vector it points to. */ > > opt_machine_mode > -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn) > +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn, > + auto_vec<int> *elsvals) > { > gcc_assert (VECTOR_MODE_P (mode)); > > /* Check if length in lanes supported for this mode directly. */ > - if (target_supports_len_load_store_p (mode, is_load, ifn)) > + if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals)) > return mode; > > /* Check if length in bytes supported for same vector size VnQI. */ > machine_mode vmode; > poly_uint64 nunits = GET_MODE_SIZE (mode); > if (related_vector_mode (mode, QImode, nunits).exists (&vmode) > - && target_supports_len_load_store_p (vmode, is_load, ifn)) > + && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals)) > return vmode; > > return opt_machine_mode (); > diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h > index f2b49991462..390954bf998 100644 > --- a/gcc/optabs-tree.h > +++ b/gcc/optabs-tree.h > @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code); > void init_tree_optimization_optabs (tree); > bool target_supports_op_p (tree, enum tree_code, > enum optab_subtype = optab_default); > +bool target_supports_mask_load_store_p (machine_mode, machine_mode, > + bool, internal_fn *, auto_vec<int> *); > bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + auto_vec<int> * = nullptr); > opt_machine_mode get_len_load_store_mode (machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + auto_vec<int> * = nullptr); > > #endif > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index 202af7a8952..d9f608dd2c0 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -55,13 +55,18 @@ along with GCC; see the file COPYING3. If not see > #include "vec-perm-indices.h" > #include "internal-fn.h" > #include "gimple-fold.h" > +#include "optabs-query.h" > > /* Return true if load- or store-lanes optab OPTAB is implemented for > - COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ > + COUNT vectors of type VECTYPE. NAME is the name of OPTAB. > + > + If it is implemented and ELSVALS is nonzero add the possible else values > + to the vector it points to. */ > > static bool > vect_lanes_optab_supported_p (const char *name, convert_optab optab, > - tree vectype, unsigned HOST_WIDE_INT count) > + tree vectype, unsigned HOST_WIDE_INT count, > + auto_vec<int> *elsvals = nullptr) > { > machine_mode mode, array_mode; > bool limit_p; > @@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > } > } > > - if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (optab, array_mode, mode)) > + == CODE_FOR_nothing) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -92,8 +99,13 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > - "can use %s<%s><%s>\n", name, GET_MODE_NAME > (array_mode), > - GET_MODE_NAME (mode)); > + "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode), > + GET_MODE_NAME (mode)); > + > + if (elsvals) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES), > + *elsvals); > > return true; > } > @@ -4177,13 +4189,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info > loop_vinfo) > be multiplied *after* it has been converted to address width. > > Return true if the function is supported, storing the function id in > - *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ > + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. > + > + If we can use gather and add the possible else values to ELSVALS. */ > > bool > vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, > tree vectype, tree memory_type, tree offset_type, > int scale, internal_fn *ifn_out, > - tree *offset_vectype_out) > + tree *offset_vectype_out, auto_vec<int> *elsvals) Do not use auto_vec<int> * either, vec<int> * in this case (and elsewhere). > { > unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); > unsigned int element_bits = vector_element_bits (vectype); > @@ -4221,7 +4235,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > > /* Test whether the target supports this combination. */ > if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, > - offset_vectype, scale)) > + offset_vectype, scale, > + elsvals)) > { > *ifn_out = ifn; > *offset_vectype_out = offset_vectype; > @@ -4231,7 +4246,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > && internal_gather_scatter_fn_supported_p (alt_ifn, vectype, > memory_type, > offset_vectype, > - scale)) > + scale, elsvals)) > { > *ifn_out = alt_ifn; > *offset_vectype_out = offset_vectype; > @@ -4239,7 +4254,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > } > else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, > memory_type, > - offset_vectype, scale)) > + offset_vectype, scale, > + elsvals)) > { > *ifn_out = alt_ifn2; > *offset_vectype_out = offset_vectype; > @@ -4278,11 +4294,13 @@ vect_describe_gather_scatter_call (stmt_vec_info > stmt_info, > } > > /* Return true if a non-affine read or write in STMT_INFO is suitable for a > - gather load or scatter store. Describe the operation in *INFO if so. */ > + gather load or scatter store. Describe the operation in *INFO if so. > + If it is suitable and ELSVALS is nonzero add the supported else values > + to the vector it points to. */ > > bool > vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, > - gather_scatter_info *info) > + gather_scatter_info *info, auto_vec<int> *elsvals) > { > HOST_WIDE_INT scale = 1; > poly_int64 pbitpos, pbitsize; > @@ -4306,6 +4324,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > ifn = gimple_call_internal_fn (call); > if (internal_gather_scatter_fn_p (ifn)) > { > + /* Extract the else value from a masked-load call. This is > + necessary when we created a gather_scatter pattern from a > + maskload. It is a bit cumbersome to basically create the > + same else value three times but it's probably acceptable until > + tree-ifcvt goes away. */ > + if (internal_fn_mask_index (ifn) >= 0 && elsvals) > + { > + tree els = gimple_call_arg (call, internal_fn_else_index (ifn)); > + elsvals->safe_push (vect_get_else_val_from_tree (els)); > + } > vect_describe_gather_scatter_call (stmt_info, info); > return true; > } > @@ -4315,7 +4343,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > /* True if we should aim to use internal functions rather than > built-in functions. */ > bool use_ifn_p = (DR_IS_READ (dr) > - ? supports_vec_gather_load_p (TYPE_MODE (vectype)) > + ? supports_vec_gather_load_p (TYPE_MODE (vectype), > + elsvals) > : supports_vec_scatter_store_p (TYPE_MODE (vectype))); > > base = DR_REF (dr); > @@ -4472,12 +4501,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > masked_p, vectype, memory_type, > signed_char_type_node, > new_scale, &ifn, > - &offset_vectype) > + &offset_vectype, > + elsvals) > && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), > masked_p, vectype, memory_type, > unsigned_char_type_node, > new_scale, &ifn, > - &offset_vectype)) > + &offset_vectype, > + elsvals)) > break; > scale = new_scale; > off = op0; > @@ -4500,7 +4531,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), > masked_p, vectype, memory_type, > TREE_TYPE (off), scale, &ifn, > - &offset_vectype)) > + &offset_vectype, elsvals)) > break; > > if (TYPE_PRECISION (TREE_TYPE (op0)) > @@ -4554,7 +4585,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > { > if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, > vectype, memory_type, offtype, scale, > - &ifn, &offset_vectype)) > + &ifn, &offset_vectype, elsvals)) > ifn = IFN_LAST; > decl = NULL_TREE; > } > @@ -6391,27 +6422,29 @@ vect_grouped_load_supported (tree vectype, bool > single_element_p, > } > > /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT > vectors > - of type VECTYPE. MASKED_P says whether the masked form is needed. */ > + of type VECTYPE. MASKED_P says whether the masked form is needed. > + If it is available and ELSVALS is nonzero add the possible else values > + to the vector it points to. */ > > internal_fn > vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > - bool masked_p) > + bool masked_p, auto_vec<int> *elsvals) > { > if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", > vec_mask_len_load_lanes_optab, vectype, > - count)) > + count, elsvals)) > return IFN_MASK_LEN_LOAD_LANES; > else if (masked_p) > { > if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", > vec_mask_load_lanes_optab, vectype, > - count)) > + count, elsvals)) > return IFN_MASK_LOAD_LANES; > } > else > { > if (vect_lanes_optab_supported_p ("vec_load_lanes", > vec_load_lanes_optab, > - vectype, count)) > + vectype, count, elsvals)) > return IFN_LOAD_LANES; > } > return IFN_LAST; > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 746f100a084..184d150f96d 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -6630,7 +6630,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > /* Make sure that the target supports an appropriate internal > function for the gather/scatter operation. */ > gather_scatter_info gs_info; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info) > + auto_vec<int> elsvals; > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info, &elsvals) > || gs_info.ifn == IFN_LAST) > return NULL; > > @@ -6653,20 +6654,27 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > tree offset = vect_add_conversion_to_pattern (vinfo, offset_type, > gs_info.offset, stmt_info); > > + tree vec_els = NULL_TREE; > /* Build the new pattern statement. */ > tree scale = size_int (gs_info.scale); > gcall *pattern_stmt; > + tree load_lhs; > if (DR_IS_READ (dr)) > { > tree zero = build_zero_cst (gs_info.element_type); > if (mask != NULL) > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, > - offset, scale, zero, mask); > + { > + int elsval = *elsvals.begin (); > + vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, > + offset, scale, zero, mask, > + vec_els); > + } > else > pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, > offset, scale, zero); > - tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); > - gimple_call_set_lhs (pattern_stmt, load_lhs); > + load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); > + gimple_set_lhs (pattern_stmt, load_lhs); > } > else > { > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 8727246c27a..d161f28d62c 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -511,13 +511,13 @@ static const int cond_expr_maps[3][5] = { > static const int no_arg_map[] = { 0 }; > static const int arg0_map[] = { 1, 0 }; > static const int arg1_map[] = { 1, 1 }; > -static const int arg2_map[] = { 1, 2 }; > -static const int arg1_arg4_map[] = { 2, 1, 4 }; > +static const int arg2_arg3_map[] = { 2, 2, 3 }; > +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; > static const int arg3_arg2_map[] = { 2, 3, 2 }; > static const int op1_op0_map[] = { 2, 1, 0 }; > static const int off_map[] = { 1, -3 }; > static const int off_op0_map[] = { 2, -3, 0 }; > -static const int off_arg2_map[] = { 2, -3, 2 }; > +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 }; > static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 }; > static const int mask_call_maps[6][7] = { > { 1, 1, }, > @@ -564,14 +564,14 @@ vect_get_operand_map (const gimple *stmt, bool > gather_scatter_p = false, > switch (gimple_call_internal_fn (call)) > { > case IFN_MASK_LOAD: > - return gather_scatter_p ? off_arg2_map : arg2_map; > + return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; > > case IFN_GATHER_LOAD: > return arg1_map; > > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > - return arg1_arg4_map; > + return arg1_arg4_arg5_map; > > case IFN_MASK_STORE: > return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; > @@ -7775,6 +7775,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > slp_tree node, > tree vector_type = SLP_TREE_VECTYPE (child); > if (!vector_type) > { > + /* Masked loads can have an undefined (default SSA definition) > + else operand. We do not need to cost it. */ > + vec<tree> ops = SLP_TREE_SCALAR_OPS (child); > + if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) > + == load_vec_info_type) > + && ((ops.length () && > + TREE_CODE (ops[0]) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (ops[0]) > + && VAR_P (SSA_NAME_VAR (ops[0]))) > + || SLP_TREE_DEF_TYPE (child) == vect_constant_def)) > + continue; > + > /* For shifts with a scalar argument we don't need > to cost or code-generate anything. > ??? Represent this more explicitely. */ > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 9b14b96cb5a..74a437735a5 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see > #include "regs.h" > #include "attribs.h" > #include "optabs-libfuncs.h" > +#include "tree-dfa.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -469,6 +470,10 @@ exist_non_indexing_operands_for_use_p (tree use, > stmt_vec_info stmt_info) > if (mask_index >= 0 > && use == gimple_call_arg (call, mask_index)) > return true; > + int els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 > + && use == gimple_call_arg (call, els_index)) > + return true; > int stored_value_index = internal_fn_stored_value_index (ifn); > if (stored_value_index >= 0 > && use == gimple_call_arg (call, stored_value_index)) > @@ -1280,7 +1285,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, > stmt_vec_info stmt_vinfo, > vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); > > gcc_assert (vector_type); > - tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > + /* A masked load can have a default SSA definition as else operand. > + We should "vectorize" this instead of creating a duplicate from the > + scalar default. */ > + tree vop; > + if (TREE_CODE (op) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (op) > + && VAR_P (SSA_NAME_VAR (op))) > + vop = get_or_create_ssa_default_def (cfun, > + create_tmp_var (vector_type)); > + else > + vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > while (ncopies--) > vec_oprnds->quick_push (vop); > } > @@ -1492,7 +1507,10 @@ static tree permute_vec_elements (vec_info *, tree, > tree, tree, stmt_vec_info, > > Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial > vectors is not supported, otherwise record the required rgroup control > - types. */ > + types. > + > + If partial vectors can be used and ELSVALS is nonzero the supported > + else values will be added to the vector ELSVALS points to. */ > > static void > check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, > @@ -1502,7 +1520,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > vect_memory_access_type > memory_access_type, > gather_scatter_info *gs_info, > - tree scalar_mask) > + tree scalar_mask, > + auto_vec<int> *elsvals = nullptr) > { > /* Invariant loads need no special support. */ > if (memory_access_type == VMAT_INVARIANT) > @@ -1518,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (slp_node) > nvectors /= group_size; > internal_fn ifn > - = (is_load ? vect_load_lanes_supported (vectype, group_size, true) > + = (is_load ? vect_load_lanes_supported (vectype, group_size, true, > + elsvals) > : vect_store_lanes_supported (vectype, group_size, true)); > if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > @@ -1548,12 +1568,14 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, > gs_info->memory_type, > gs_info->offset_vectype, > - gs_info->scale)) > + gs_info->scale, > + elsvals)) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > else if (internal_gather_scatter_fn_supported_p (ifn, vectype, > gs_info->memory_type, > gs_info->offset_vectype, > - gs_info->scale)) > + gs_info->scale, > + elsvals)) > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > scalar_mask); > else > @@ -1607,7 +1629,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > machine_mode mask_mode; > machine_mode vmode; > bool using_partial_vectors_p = false; > - if (get_len_load_store_mode (vecmode, is_load).exists (&vmode)) > + if (get_len_load_store_mode > + (vecmode, is_load, nullptr, elsvals).exists (&vmode)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE > (vecmode); > @@ -1615,7 +1638,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > using_partial_vectors_p = true; > } > else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode) > - && can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) > + && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL, > + elsvals)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > scalar_mask); > @@ -1672,12 +1696,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree > mask_type, tree loop_mask, > without loss of precision, where X is STMT_INFO's DR_STEP. > > Return true if this is possible, describing the gather load or scatter > - store in GS_INFO. MASKED_P is true if the load or store is conditional. > */ > + store in GS_INFO. MASKED_P is true if the load or store is conditional. > + > + If we can use gather/scatter and ELSVALS is nonzero the supported > + else values will be added to the vector ELSVALS points to. */ > > static bool > vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + auto_vec<int> *elsvals) > { > dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); > data_reference *dr = dr_info->dr; > @@ -1734,7 +1762,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > tree memory_type = TREE_TYPE (DR_REF (dr)); > if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, > vectype, memory_type, offset_type, scale, > - &gs_info->ifn, &gs_info->offset_vectype) > + &gs_info->ifn, &gs_info->offset_vectype, > + elsvals) > || gs_info->ifn == IFN_LAST) > continue; > > @@ -1762,17 +1791,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > vectorize STMT_INFO, which is a grouped or strided load or store. > MASKED_P is true if load or store is conditional. When returning > true, fill in GS_INFO with the information required to perform the > - operation. */ > + operation. > + > + If we can use gather/scatter and ELSVALS is nonzero the supported > + else values will be added to the vector ELSVALS points to. */ > > static bool > vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + auto_vec<int> *elsvals) > { > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) > || gs_info->ifn == IFN_LAST) > return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, > - masked_p, gs_info); > + masked_p, gs_info, elsvals); > > tree old_offset_type = TREE_TYPE (gs_info->offset); > tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); > @@ -1985,7 +2018,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + auto_vec<int> *elsvals) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; > @@ -2074,7 +2108,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > else if (slp_node->ldst_lanes > && (*lanes_ifn > = (vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, > masked_p) > + ? vect_load_lanes_supported (vectype, group_size, > + masked_p, elsvals) > : vect_store_lanes_supported (vectype, group_size, > masked_p))) != > IFN_LAST) > *memory_access_type = VMAT_LOAD_STORE_LANES; > @@ -2242,7 +2277,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > /* Otherwise try using LOAD/STORE_LANES. */ > *lanes_ifn > = vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, masked_p) > + ? vect_load_lanes_supported (vectype, group_size, masked_p, > + elsvals) > : vect_store_lanes_supported (vectype, group_size, > masked_p); > if (*lanes_ifn != IFN_LAST) > @@ -2276,7 +2312,7 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > && single_element_p > && loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER; > > if (*memory_access_type == VMAT_GATHER_SCATTER > @@ -2338,7 +2374,10 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > SLP says whether we're performing SLP rather than loop vectorization. > MASKED_P is true if the statement is conditional on a vectorized mask. > VECTYPE is the vector type that the vectorized statements will use. > - NCOPIES is the number of vector statements that will be needed. */ > + NCOPIES is the number of vector statements that will be needed. > + > + If ELSVALS is nonzero the supported else values will be added to the > + vector ELSVALS points to. */ > > static bool > get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, > @@ -2350,7 +2389,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + auto_vec<int> *elsvals = nullptr) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > @@ -2359,7 +2399,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > { > *memory_access_type = VMAT_GATHER_SCATTER; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, > + elsvals)) > gcc_unreachable (); > /* When using internal functions, we rely on pattern recognition > to convert the type of the offset to the type that the target > @@ -2413,7 +2454,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > masked_p, > vls_type, memory_access_type, poffset, > alignment_support_scheme, > - misalignment, gs_info, lanes_ifn)) > + misalignment, gs_info, lanes_ifn, > + elsvals)) > return false; > } > else if (STMT_VINFO_STRIDED_P (stmt_info)) > @@ -2421,7 +2463,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > gcc_assert (!slp_node); > if (loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER; > else > *memory_access_type = VMAT_ELEMENTWISE; > @@ -2689,6 +2731,53 @@ vect_build_zero_merge_argument (vec_info *vinfo, > return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL); > } > > +/* Return the supported else value for a masked load internal function IFN. > + The vector type is given in VECTYPE and the mask type in VECTYPE2. > + TYPE specifies the type of the returned else value. */ > + > +tree > +vect_get_mask_load_else (int elsval, tree type) > +{ > + tree els; > + if (elsval == MASK_LOAD_ELSE_UNDEFINED) > + { > + tree tmp = create_tmp_var (type); > + /* No need to warn about anything. */ > + TREE_NO_WARNING (tmp) = 1; > + els = get_or_create_ssa_default_def (cfun, tmp); > + } > + else if (elsval == MASK_LOAD_ELSE_M1) > + els = build_minus_one_cst (type); > + else if (elsval == MASK_LOAD_ELSE_ZERO) > + els = build_zero_cst (type); > + else > + __builtin_unreachable (); > + > + return els; > +} > + > +/* Return the integer define a tree else operand ELS represents. > + This performs the inverse of vect_get_mask_load_else. Refer to > + vect_check_gather_scatter for its usage rationale. */ > + > +int > +vect_get_else_val_from_tree (tree els) > +{ > + if (TREE_CODE (els) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (els) > + && TREE_CODE (SSA_NAME_VAR (els)) == VAR_DECL) > + return MASK_LOAD_ELSE_UNDEFINED; > + else > + { > + if (zerop (els)) > + return MASK_LOAD_ELSE_ZERO; > + else if (integer_minus_onep (els)) > + return MASK_LOAD_ELSE_M1; > + else > + __builtin_unreachable (); > + } > +} > + > /* Build a gather load call while vectorizing STMT_INFO. Insert new > instructions before GSI and add them to VEC_STMT. GS_INFO describes > the gather load operation. If the load is conditional, MASK is the > @@ -2770,8 +2859,14 @@ vect_build_one_gather_load_call (vec_info *vinfo, > stmt_vec_info stmt_info, > } > > tree scale = build_int_cst (scaletype, gs_info->scale); > - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > - mask_op, scale); > + gimple *new_stmt; > + > + if (!mask) > + new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > + mask_op, scale); > + else > + new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, > + mask_op, scale); > > if (!useless_type_conversion_p (vectype, rettype)) > { > @@ -9967,6 +10062,7 @@ vectorizable_load (vec_info *vinfo, > gather_scatter_info gs_info; > tree ref_type; > enum vect_def_type mask_dt = vect_unknown_def_type; > + enum vect_def_type els_dt = vect_unknown_def_type; > > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > return false; > @@ -9979,8 +10075,12 @@ vectorizable_load (vec_info *vinfo, > return false; > > tree mask = NULL_TREE, mask_vectype = NULL_TREE; > + tree els = NULL_TREE; tree els_vectype = NULL_TREE; > + > int mask_index = -1; > + int els_index = -1; > slp_tree slp_op = NULL; > + slp_tree els_op = NULL; > if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) > { > scalar_dest = gimple_assign_lhs (assign); > @@ -10020,6 +10120,15 @@ vectorizable_load (vec_info *vinfo, > && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, > &mask, &slp_op, &mask_dt, &mask_vectype)) > return false; > + > + els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 && slp_node) > + els_index = vect_slp_child_index_for_operand > + (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); > + if (els_index >= 0 > + && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index, > + &els, &els_op, &els_dt, &els_vectype)) > + return false; > } > > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > @@ -10122,10 +10231,11 @@ vectorizable_load (vec_info *vinfo, > int misalignment; > poly_int64 poffset; > internal_fn lanes_ifn; > + auto_vec<int> elsvals; > if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, > VLS_LOAD, > ncopies, &memory_access_type, &poffset, > &alignment_support_scheme, &misalignment, &gs_info, > - &lanes_ifn)) > + &lanes_ifn, &elsvals)) > return false; > > /* ??? The following checks should really be part of > @@ -10191,7 +10301,8 @@ vectorizable_load (vec_info *vinfo, > machine_mode vec_mode = TYPE_MODE (vectype); > if (!VECTOR_MODE_P (vec_mode) > || !can_vec_mask_load_store_p (vec_mode, > - TYPE_MODE (mask_vectype), true)) > + TYPE_MODE (mask_vectype), > + true, NULL, &elsvals)) > return false; > } > else if (memory_access_type != VMAT_LOAD_STORE_LANES > @@ -10260,6 +10371,16 @@ vectorizable_load (vec_info *vinfo, > > STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; > } > + else > + { > + /* Here just get the else values. */ > + if (loop_vinfo > + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > + check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > + VLS_LOAD, group_size, > + memory_access_type, &gs_info, > + mask, &elsvals); > + } > > if (!slp) > gcc_assert (memory_access_type > @@ -10930,6 +11051,7 @@ vectorizable_load (vec_info *vinfo, > } > > tree vec_mask = NULL_TREE; > + tree vec_els = NULL_TREE; > if (memory_access_type == VMAT_LOAD_STORE_LANES) > { > gcc_assert (alignment_support_scheme == dr_aligned > @@ -11020,6 +11142,11 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (final_mask) > + vec_els = vect_get_mask_load_else > + (elsvals.contains (MASK_LOAD_ELSE_ZERO) > + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype); > + > gcall *call; > if (final_len && final_mask) > { > @@ -11028,9 +11155,10 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK, LEN, BIAS). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5, > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6, > dataref_ptr, alias_ptr, > - final_mask, final_len, bias); > + final_mask, vec_els, > + final_len, bias); > } > else if (final_mask) > { > @@ -11039,9 +11167,9 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, > + call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4, > dataref_ptr, alias_ptr, > - final_mask); > + final_mask, vec_els); > } > else > { > @@ -11190,17 +11318,29 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (final_mask) > + vec_els = vect_get_mask_load_else > + (elsvals.contains (MASK_LOAD_ELSE_ZERO) > + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype); > + > gcall *call; > if (final_len && final_mask) > - call > - = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7, > - dataref_ptr, vec_offset, > - scale, zero, final_mask, > - final_len, bias); > + { > + call > + = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, > + 8, dataref_ptr, > + vec_offset, scale, zero, > + final_mask, vec_els, > + final_len, bias); > + } > else if (final_mask) > - call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, > - dataref_ptr, vec_offset, > - scale, zero, final_mask); > + { > + call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, > + 6, dataref_ptr, > + vec_offset, scale, > + zero, final_mask, > + vec_els); > + } why add these unneeded braces? > else > call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, > dataref_ptr, vec_offset, > @@ -11514,6 +11654,7 @@ vectorizable_load (vec_info *vinfo, > tree final_mask = NULL_TREE; > tree final_len = NULL_TREE; > tree bias = NULL_TREE; > + > if (!costing_p) > { > if (mask) > @@ -11566,7 +11707,8 @@ vectorizable_load (vec_info *vinfo, > if (loop_lens) > { > opt_machine_mode new_ovmode > - = get_len_load_store_mode (vmode, true, &partial_ifn); > + = get_len_load_store_mode (vmode, true, &partial_ifn, > + &elsvals); > new_vmode = new_ovmode.require (); > unsigned factor > = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode); > @@ -11578,7 +11720,7 @@ vectorizable_load (vec_info *vinfo, > { > if (!can_vec_mask_load_store_p ( > vmode, TYPE_MODE (TREE_TYPE (final_mask)), true, > - &partial_ifn)) > + &partial_ifn, &elsvals)) > gcc_unreachable (); > } > > @@ -11606,19 +11748,28 @@ vectorizable_load (vec_info *vinfo, > bias = build_int_cst (intQI_type_node, biasval); > } > > + tree vec_els; > + > if (final_len) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > gcall *call; > if (partial_ifn == IFN_MASK_LEN_LOAD) > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5, > - dataref_ptr, ptr, > - final_mask, final_len, > - bias); > + { > + vec_els = vect_get_mask_load_else > + (elsvals.contains (MASK_LOAD_ELSE_ZERO) > + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype); > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, > + 6, dataref_ptr, ptr, > + final_mask, vec_els, > + final_len, bias); > + } > else > - call = gimple_build_call_internal (IFN_LEN_LOAD, 4, > - dataref_ptr, ptr, > - final_len, bias); > + { > + call = gimple_build_call_internal (IFN_LEN_LOAD, 4, > + dataref_ptr, ptr, > + final_len, bias); > + } Likewise. Otherwise looks OK to me. Richard. > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > @@ -11641,9 +11792,13 @@ vectorizable_load (vec_info *vinfo, > else if (final_mask) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > - gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3, > + vec_els = vect_get_mask_load_else > + (elsvals.contains (MASK_LOAD_ELSE_ZERO) > + ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype); > + gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4, > dataref_ptr, ptr, > - final_mask); > + final_mask, > + vec_els); > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index b7f2708fec0..0b20c36a7fe 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2439,9 +2439,11 @@ extern bool vect_slp_analyze_instance_alignment > (vec_info *, slp_instance); > extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); > extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); > extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, > - tree, int, internal_fn *, tree *); > + tree, int, internal_fn *, tree *, > + auto_vec<int> * = nullptr); > extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, > - gather_scatter_info *); > + gather_scatter_info *, > + auto_vec<int> * = nullptr); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > vec<data_reference_p> *, > vec<int> *, int); > @@ -2459,7 +2461,8 @@ extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); > -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > + bool, auto_vec<int> * = nullptr); > extern void vect_permute_store_chain (vec_info *, vec<tree> &, > unsigned int, stmt_vec_info, > gimple_stmt_iterator *, vec<tree> *); > @@ -2605,6 +2608,8 @@ extern int vect_slp_child_index_for_operand (const > gimple *, int op, bool); > > extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, > gimple_stmt_iterator *); > +extern tree vect_get_mask_load_else (int, tree); > +extern int vect_get_else_val_from_tree (tree els); > > /* In tree-vect-patterns.cc. */ > extern void > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)