On Thu, 7 Nov 2024, Robin Dapp wrote: > From: Robin Dapp <rd...@ventanamicro.com> > > This patch adds an else operand to vectorized masked load calls. > The current implementation adds else-value arguments to the respective > target-querying functions that is used to supply the vectorizer with the > proper else value. > > We query the target for its supported else operand and uses that for the > maskload call. If necessary, i.e. if the mode has padding bits and if > the else operand is nonzero, a VEC_COND enforcing a zero else value is > emitted.
LGTM. Richard. > gcc/ChangeLog: > > * optabs-query.cc (supports_vec_convert_optab_p): Return icode. > (get_supported_else_val): Return supported else value for > optab's operand at index. > (supports_vec_gather_load_p): Add else argument. > (supports_vec_scatter_store_p): Ditto. > * optabs-query.h (supports_vec_gather_load_p): Ditto. > (get_supported_else_val): Ditto. > * optabs-tree.cc (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > (target_supports_len_load_store_p): Ditto. > (get_len_load_store_mode): Ditto. > * optabs-tree.h (target_supports_mask_load_store_p): Ditto. > (can_vec_mask_load_store_p): Ditto. > * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto. > (vect_gather_scatter_fn_p): Ditto. > (vect_check_gather_scatter): Ditto. > (vect_load_lanes_supported): Ditto. > * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): > Ditto. > * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for > else operand. > (vect_slp_analyze_node_operations): Skip undefined else operand. > * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p): > Add else operand handling. > (vect_get_vec_defs_for_operand): Handle undefined else operand. > (check_load_store_for_partial_vectors): Add else argument. > (vect_truncate_gather_scatter_offset): Ditto. > (vect_use_strided_gather_scatters_p): Ditto. > (get_group_load_store_type): Ditto. > (get_load_store_type): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > (vect_build_one_gather_load_call): Add zero else operand. > (vectorizable_load): Use else operand. > * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else > argument. > (vect_load_lanes_supported): Ditto. > (vect_get_mask_load_else): Ditto. > (vect_get_else_val_from_tree): Ditto. > > vect > --- > gcc/optabs-query.cc | 70 +++++--- > gcc/optabs-query.h | 3 +- > gcc/optabs-tree.cc | 66 ++++++-- > gcc/optabs-tree.h | 8 +- > gcc/tree-vect-data-refs.cc | 74 ++++++--- > gcc/tree-vect-patterns.cc | 12 +- > gcc/tree-vect-slp.cc | 25 ++- > gcc/tree-vect-stmts.cc | 326 +++++++++++++++++++++++++++++++------ > gcc/tree-vectorizer.h | 10 +- > 9 files changed, 468 insertions(+), 126 deletions(-) > > diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc > index cc52bc0f5ea..c1f3558af92 100644 > --- a/gcc/optabs-query.cc > +++ b/gcc/optabs-query.cc > @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see > #include "rtl.h" > #include "recog.h" > #include "vec-perm-indices.h" > +#include "internal-fn.h" > +#include "memmodel.h" > +#include "optabs.h" > > struct target_optabs default_target_optabs; > struct target_optabs *this_fn_optabs = &default_target_optabs; > @@ -672,34 +675,57 @@ lshift_cheap_p (bool speed_p) > that mode, given that the second mode is always an integer vector. > If MODE is VOIDmode, return true if OP supports any vector mode. */ > > -static bool > -supports_vec_convert_optab_p (optab op, machine_mode mode) > +static enum insn_code > +supported_vec_convert_optab (optab op, machine_mode mode) > { > int start = mode == VOIDmode ? 0 : mode; > int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode; > + enum insn_code icode = CODE_FOR_nothing; > for (int i = start; i <= end; ++i) > if (VECTOR_MODE_P ((machine_mode) i)) > for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) > - if (convert_optab_handler (op, (machine_mode) i, > - (machine_mode) j) != CODE_FOR_nothing) > - return true; > + { > + if ((icode > + = convert_optab_handler (op, (machine_mode) i, > + (machine_mode) j)) != CODE_FOR_nothing) > + return icode; > + } > > - return false; > + return icode; > } > > /* If MODE is not VOIDmode, return true if vec_gather_load is available for > that mode. If MODE is VOIDmode, return true if gather_load is available > - for at least one vector mode. */ > + for at least one vector mode. > + In that case, and if ELSVALS is nonzero, store the supported else values > + into the vector it points to. */ > > bool > -supports_vec_gather_load_p (machine_mode mode) > +supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals) > { > - if (!this_fn_optabs->supports_vec_gather_load[mode]) > - this_fn_optabs->supports_vec_gather_load[mode] > - = (supports_vec_convert_optab_p (gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_gather_load_optab, mode) > - || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode) > - ? 1 : -1); > + enum insn_code icode = CODE_FOR_nothing; > + if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals) > + { > + /* Try the masked variants first. In case we later decide that we > + need a mask after all (thus requiring an else operand) we need > + to query it below and we cannot do that when using the > + non-masked optab. */ > + icode = supported_vec_convert_optab (mask_gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supported_vec_convert_optab (mask_len_gather_load_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supported_vec_convert_optab (gather_load_optab, mode); > + this_fn_optabs->supports_vec_gather_load[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > + > + /* For gather the optab's operand indices do not match the IFN's because > + the latter does not have the extension operand (operand 3). It is > + implicitly added during expansion so we use the IFN's else index + 1. > + */ > + if (elsvals && icode != CODE_FOR_nothing) > + get_supported_else_vals > + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); > > return this_fn_optabs->supports_vec_gather_load[mode] > 0; > } > @@ -711,12 +737,18 @@ supports_vec_gather_load_p (machine_mode mode) > bool > supports_vec_scatter_store_p (machine_mode mode) > { > + enum insn_code icode; > if (!this_fn_optabs->supports_vec_scatter_store[mode]) > - this_fn_optabs->supports_vec_scatter_store[mode] > - = (supports_vec_convert_optab_p (scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_scatter_store_optab, mode) > - || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode) > - ? 1 : -1); > + { > + icode = supported_vec_convert_optab (scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supported_vec_convert_optab (mask_scatter_store_optab, mode); > + if (icode == CODE_FOR_nothing) > + icode = supported_vec_convert_optab (mask_len_scatter_store_optab, > + mode); > + this_fn_optabs->supports_vec_scatter_store[mode] > + = (icode != CODE_FOR_nothing) ? 1 : -1; > + } > > return this_fn_optabs->supports_vec_scatter_store[mode] > 0; > } > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h > index 0cb2c21ba85..f38b1e5d5bb 100644 > --- a/gcc/optabs-query.h > +++ b/gcc/optabs-query.h > @@ -191,7 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool); > bool can_atomic_exchange_p (machine_mode, bool); > bool can_atomic_load_p (machine_mode); > bool lshift_cheap_p (bool); > -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode); > +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, > + vec<int> * = nullptr); > bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode); > bool can_vec_extract (machine_mode, machine_mode); > > diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc > index b69a5bc3676..3d2d782ea32 100644 > --- a/gcc/optabs-tree.cc > +++ b/gcc/optabs-tree.cc > @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see > #include "optabs.h" > #include "optabs-tree.h" > #include "stor-layout.h" > +#include "internal-fn.h" > > /* Return the optab used for computing the operation given by the tree code, > CODE and the tree EXP. This function is not always usable (for example, > it > @@ -552,24 +553,38 @@ target_supports_op_p (tree type, enum tree_code code, > or mask_len_{load,store}. > This helper function checks whether target supports masked > load/store and return corresponding IFN in the last argument > - (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */ > + (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). > + If there is support and ELSVALS is nonzero store the possible else values > + in the vector it points to. */ > > -static bool > +bool > target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode, > - bool is_load, internal_fn *ifn) > + bool is_load, internal_fn *ifn, > + vec<int> *elsvals) > { > optab op = is_load ? maskload_optab : maskstore_optab; > optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab; > - if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE; > + if (elsvals && is_load) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LOAD), > + *elsvals); > return true; > } > - else if (convert_optab_handler (len_op, mode, mask_mode) != > CODE_FOR_nothing) > + else if ((icode = convert_optab_handler (len_op, mode, mask_mode)) > + != CODE_FOR_nothing) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsvals && is_load) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD), > + *elsvals); > return true; > } > return false; > @@ -578,19 +593,23 @@ target_supports_mask_load_store_p (machine_mode mode, > machine_mode mask_mode, > /* Return true if target supports vector masked load/store for mode. > An additional output in the last argument which is the IFN pointer. > We set IFN as MASK_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according > - which optab is supported in the target. */ > + which optab is supported in the target. > + If there is support and ELSVALS is nonzero store the possible else values > + in the vector it points to. */ > > bool > can_vec_mask_load_store_p (machine_mode mode, > machine_mode mask_mode, > bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, > + vec<int> *elsvals) > { > machine_mode vmode; > > /* If mode is vector mode, check it directly. */ > if (VECTOR_MODE_P (mode)) > - return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn); > + return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn, > + elsvals); > > /* Otherwise, return true if there is some vector mode with > the mask load/store supported. */ > @@ -604,7 +623,8 @@ can_vec_mask_load_store_p (machine_mode mode, > vmode = targetm.vectorize.preferred_simd_mode (smode); > if (VECTOR_MODE_P (vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsvals)) > return true; > > auto_vector_modes vector_modes; > @@ -612,7 +632,8 @@ can_vec_mask_load_store_p (machine_mode mode, > for (machine_mode base_mode : vector_modes) > if (related_vector_mode (base_mode, smode).exists (&vmode) > && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) > - && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn)) > + && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn, > + elsvals)) > return true; > return false; > } > @@ -622,11 +643,13 @@ can_vec_mask_load_store_p (machine_mode mode, > or mask_len_{load,store}. > This helper function checks whether target supports len > load/store and return corresponding IFN in the last argument > - (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */ > + (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). > + If there is support and ELSVALS is nonzero store thepossible > + else values in the vector it points to. */ > > static bool > target_supports_len_load_store_p (machine_mode mode, bool is_load, > - internal_fn *ifn) > + internal_fn *ifn, vec<int> *elsvals) > { > optab op = is_load ? len_load_optab : len_store_optab; > optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab; > @@ -638,11 +661,17 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > return true; > } > machine_mode mask_mode; > + enum insn_code icode; > if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode) > - && convert_optab_handler (masked_op, mode, mask_mode) != > CODE_FOR_nothing) > + && ((icode = convert_optab_handler (masked_op, mode, mask_mode)) > + != CODE_FOR_nothing)) > { > if (ifn) > *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; > + if (elsvals && is_load) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD), > + *elsvals); > return true; > } > return false; > @@ -656,22 +685,25 @@ target_supports_len_load_store_p (machine_mode mode, > bool is_load, > VnQI to wrap the other supportable same size vector modes. > An additional output in the last argument which is the IFN pointer. > We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according > - which optab is supported in the target. */ > + which optab is supported in the target. > + If there is support and ELSVALS is nonzero store the possible else values > + in the vector it points to. */ > > opt_machine_mode > -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn) > +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn, > + vec<int> *elsvals) > { > gcc_assert (VECTOR_MODE_P (mode)); > > /* Check if length in lanes supported for this mode directly. */ > - if (target_supports_len_load_store_p (mode, is_load, ifn)) > + if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals)) > return mode; > > /* Check if length in bytes supported for same vector size VnQI. */ > machine_mode vmode; > poly_uint64 nunits = GET_MODE_SIZE (mode); > if (related_vector_mode (mode, QImode, nunits).exists (&vmode) > - && target_supports_len_load_store_p (vmode, is_load, ifn)) > + && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals)) > return vmode; > > return opt_machine_mode (); > diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h > index 85805fd8296..37102c94f0c 100644 > --- a/gcc/optabs-tree.h > +++ b/gcc/optabs-tree.h > @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code = > ERROR_MARK); > void init_tree_optimization_optabs (tree); > bool target_supports_op_p (tree, enum tree_code, > enum optab_subtype = optab_default); > +bool target_supports_mask_load_store_p (machine_mode, machine_mode, > + bool, internal_fn *, vec<int> *); > bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + vec<int> * = nullptr); > opt_machine_mode get_len_load_store_mode (machine_mode, bool, > - internal_fn * = nullptr); > + internal_fn * = nullptr, > + vec<int> * = nullptr); > > #endif > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index 54ad5c8f3dc..f57c6750166 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -55,13 +55,18 @@ along with GCC; see the file COPYING3. If not see > #include "vec-perm-indices.h" > #include "internal-fn.h" > #include "gimple-fold.h" > +#include "optabs-query.h" > > /* Return true if load- or store-lanes optab OPTAB is implemented for > - COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ > + COUNT vectors of type VECTYPE. NAME is the name of OPTAB. > + > + If it is implemented and ELSVALS is nonzero store the possible else > + values in the vector it points to. */ > > static bool > vect_lanes_optab_supported_p (const char *name, convert_optab optab, > - tree vectype, unsigned HOST_WIDE_INT count) > + tree vectype, unsigned HOST_WIDE_INT count, > + vec<int> *elsvals = nullptr) > { > machine_mode mode, array_mode; > bool limit_p; > @@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > } > } > > - if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) > + enum insn_code icode; > + if ((icode = convert_optab_handler (optab, array_mode, mode)) > + == CODE_FOR_nothing) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -92,8 +99,13 @@ vect_lanes_optab_supported_p (const char *name, > convert_optab optab, > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > - "can use %s<%s><%s>\n", name, GET_MODE_NAME > (array_mode), > - GET_MODE_NAME (mode)); > + "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode), > + GET_MODE_NAME (mode)); > + > + if (elsvals) > + get_supported_else_vals (icode, > + internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES), > + *elsvals); > > return true; > } > @@ -4184,13 +4196,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info > loop_vinfo) > be multiplied *after* it has been converted to address width. > > Return true if the function is supported, storing the function id in > - *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ > + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. > + > + If we can use gather and store the possible else values in ELSVALS. */ > > bool > vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, > tree vectype, tree memory_type, tree offset_type, > int scale, internal_fn *ifn_out, > - tree *offset_vectype_out) > + tree *offset_vectype_out, vec<int> *elsvals) > { > unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); > unsigned int element_bits = vector_element_bits (vectype); > @@ -4228,7 +4242,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > > /* Test whether the target supports this combination. */ > if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, > - offset_vectype, scale)) > + offset_vectype, scale, > + elsvals)) > { > *ifn_out = ifn; > *offset_vectype_out = offset_vectype; > @@ -4238,7 +4253,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > && internal_gather_scatter_fn_supported_p (alt_ifn, vectype, > memory_type, > offset_vectype, > - scale)) > + scale, elsvals)) > { > *ifn_out = alt_ifn; > *offset_vectype_out = offset_vectype; > @@ -4246,7 +4261,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, > bool masked_p, > } > else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, > memory_type, > - offset_vectype, scale)) > + offset_vectype, scale, > + elsvals)) > { > *ifn_out = alt_ifn2; > *offset_vectype_out = offset_vectype; > @@ -4285,11 +4301,13 @@ vect_describe_gather_scatter_call (stmt_vec_info > stmt_info, > } > > /* Return true if a non-affine read or write in STMT_INFO is suitable for a > - gather load or scatter store. Describe the operation in *INFO if so. */ > + gather load or scatter store. Describe the operation in *INFO if so. > + If it is suitable and ELSVALS is nonzero store the supported else values > + in the vector it points to. */ > > bool > vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, > - gather_scatter_info *info) > + gather_scatter_info *info, vec<int> *elsvals) > { > HOST_WIDE_INT scale = 1; > poly_int64 pbitpos, pbitsize; > @@ -4314,6 +4332,13 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > if (internal_gather_scatter_fn_p (ifn)) > { > vect_describe_gather_scatter_call (stmt_info, info); > + > + /* In pattern recog we simply used a ZERO else value that > + we need to correct here. To that end just re-use the > + (already succesful) check if we support a gather IFN > + and have it populate the else values. */ > + if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals) > + supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals); > return true; > } > masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE); > @@ -4322,7 +4347,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > /* True if we should aim to use internal functions rather than > built-in functions. */ > bool use_ifn_p = (DR_IS_READ (dr) > - ? supports_vec_gather_load_p (TYPE_MODE (vectype)) > + ? supports_vec_gather_load_p (TYPE_MODE (vectype), > + elsvals) > : supports_vec_scatter_store_p (TYPE_MODE (vectype))); > > base = DR_REF (dr); > @@ -4479,12 +4505,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > masked_p, vectype, memory_type, > signed_char_type_node, > new_scale, &ifn, > - &offset_vectype) > + &offset_vectype, > + elsvals) > && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), > masked_p, vectype, memory_type, > unsigned_char_type_node, > new_scale, &ifn, > - &offset_vectype)) > + &offset_vectype, > + elsvals)) > break; > scale = new_scale; > off = op0; > @@ -4507,7 +4535,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), > masked_p, vectype, memory_type, > TREE_TYPE (off), scale, &ifn, > - &offset_vectype)) > + &offset_vectype, elsvals)) > break; > > if (TYPE_PRECISION (TREE_TYPE (op0)) > @@ -4561,7 +4589,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > { > if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, > vectype, memory_type, offtype, scale, > - &ifn, &offset_vectype)) > + &ifn, &offset_vectype, elsvals)) > ifn = IFN_LAST; > decl = NULL_TREE; > } > @@ -6398,27 +6426,29 @@ vect_grouped_load_supported (tree vectype, bool > single_element_p, > } > > /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT > vectors > - of type VECTYPE. MASKED_P says whether the masked form is needed. */ > + of type VECTYPE. MASKED_P says whether the masked form is needed. > + If it is available and ELSVALS is nonzero store the possible else values > + in the vector it points to. */ > > internal_fn > vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, > - bool masked_p) > + bool masked_p, vec<int> *elsvals) > { > if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", > vec_mask_len_load_lanes_optab, vectype, > - count)) > + count, elsvals)) > return IFN_MASK_LEN_LOAD_LANES; > else if (masked_p) > { > if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", > vec_mask_load_lanes_optab, vectype, > - count)) > + count, elsvals)) > return IFN_MASK_LOAD_LANES; > } > else > { > if (vect_lanes_optab_supported_p ("vec_load_lanes", > vec_load_lanes_optab, > - vectype, count)) > + vectype, count, elsvals)) > return IFN_LOAD_LANES; > } > return IFN_LAST; > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index a708234304f..eb0e5808f7f 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -6021,12 +6021,20 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, > /* Build the new pattern statement. */ > tree scale = size_int (gs_info.scale); > gcall *pattern_stmt; > + > if (DR_IS_READ (dr)) > { > tree zero = build_zero_cst (gs_info.element_type); > if (mask != NULL) > - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, > - offset, scale, zero, mask); > + { > + int elsval = MASK_LOAD_ELSE_ZERO; > + > + tree vec_els > + = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); > + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, > + offset, scale, zero, mask, > + vec_els); > + } > else > pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, > offset, scale, zero); > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 97c362d24f8..2986cc3fc4c 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -511,13 +511,13 @@ static const int cond_expr_maps[3][5] = { > static const int no_arg_map[] = { 0 }; > static const int arg0_map[] = { 1, 0 }; > static const int arg1_map[] = { 1, 1 }; > -static const int arg2_map[] = { 1, 2 }; > -static const int arg1_arg4_map[] = { 2, 1, 4 }; > +static const int arg2_arg3_map[] = { 2, 2, 3 }; > +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; > static const int arg3_arg2_map[] = { 2, 3, 2 }; > static const int op1_op0_map[] = { 2, 1, 0 }; > static const int off_map[] = { 1, -3 }; > static const int off_op0_map[] = { 2, -3, 0 }; > -static const int off_arg2_map[] = { 2, -3, 2 }; > +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 }; > static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 }; > static const int mask_call_maps[6][7] = { > { 1, 1, }, > @@ -564,14 +564,14 @@ vect_get_operand_map (const gimple *stmt, bool > gather_scatter_p = false, > switch (gimple_call_internal_fn (call)) > { > case IFN_MASK_LOAD: > - return gather_scatter_p ? off_arg2_map : arg2_map; > + return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; > > case IFN_GATHER_LOAD: > return arg1_map; > > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_LEN_GATHER_LOAD: > - return arg1_arg4_map; > + return arg1_arg4_arg5_map; > > case IFN_MASK_STORE: > return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; > @@ -2675,7 +2675,8 @@ out: > tree op0; > tree uniform_val = op0 = oprnd_info->ops[0]; > for (j = 1; j < oprnd_info->ops.length (); ++j) > - if (!operand_equal_p (uniform_val, oprnd_info->ops[j])) > + if (!oprnd_info->ops[j] > + || !operand_equal_p (uniform_val, oprnd_info->ops[j])) > { > uniform_val = NULL_TREE; > break; > @@ -7928,6 +7929,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > slp_tree node, > tree vector_type = SLP_TREE_VECTYPE (child); > if (!vector_type) > { > + /* Masked loads can have an undefined (default SSA definition) > + else operand. We do not need to cost it. */ > + vec<tree> ops = SLP_TREE_SCALAR_OPS (child); > + if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) > + == load_vec_info_type) > + && ((ops.length () > + && TREE_CODE (ops[0]) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (ops[0]) > + && VAR_P (SSA_NAME_VAR (ops[0]))) > + || SLP_TREE_DEF_TYPE (child) == vect_constant_def)) > + continue; > + > /* For shifts with a scalar argument we don't need > to cost or code-generate anything. > ??? Represent this more explicitely. */ > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 9a2c2ea753e..a90885eabe3 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see > #include "regs.h" > #include "attribs.h" > #include "optabs-libfuncs.h" > +#include "tree-dfa.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -157,28 +158,45 @@ create_vector_array (tree elem_type, unsigned > HOST_WIDE_INT nelems) > /* ARRAY is an array of vectors created by create_vector_array. > Return an SSA_NAME for the vector in index N. The reference > is part of the vectorization of STMT_INFO and the vector is associated > - with scalar destination SCALAR_DEST. */ > + with scalar destination SCALAR_DEST. > + If we need to ensure that inactive elements are set to zero, > + NEED_ZEROING is true, MASK contains the loop mask to be used. */ > > static tree > read_vector_array (vec_info *vinfo, > stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, > - tree scalar_dest, tree array, unsigned HOST_WIDE_INT n) > + tree scalar_dest, tree array, unsigned HOST_WIDE_INT n, > + bool need_zeroing, tree mask) > { > - tree vect_type, vect, vect_name, array_ref; > + tree vect_type, vect, vect_name, tmp, tmp_name, array_ref; > gimple *new_stmt; > > gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); > vect_type = TREE_TYPE (TREE_TYPE (array)); > + tmp = vect_create_destination_var (scalar_dest, vect_type); > vect = vect_create_destination_var (scalar_dest, vect_type); > array_ref = build4 (ARRAY_REF, vect_type, array, > build_int_cst (size_type_node, n), > NULL_TREE, NULL_TREE); > > - new_stmt = gimple_build_assign (vect, array_ref); > - vect_name = make_ssa_name (vect, new_stmt); > - gimple_assign_set_lhs (new_stmt, vect_name); > + new_stmt = gimple_build_assign (tmp, array_ref); > + tmp_name = make_ssa_name (vect, new_stmt); > + gimple_assign_set_lhs (new_stmt, tmp_name); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > > + if (need_zeroing) > + { > + tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO, > + vect_type); > + vect_name = make_ssa_name (vect, new_stmt); > + new_stmt > + = gimple_build_assign (vect_name, VEC_COND_EXPR, > + mask, tmp_name, vec_els); > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > + } > + else > + vect_name = tmp_name; > + > return vect_name; > } > > @@ -469,6 +487,10 @@ exist_non_indexing_operands_for_use_p (tree use, > stmt_vec_info stmt_info) > if (mask_index >= 0 > && use == gimple_call_arg (call, mask_index)) > return true; > + int els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 > + && use == gimple_call_arg (call, els_index)) > + return true; > int stored_value_index = internal_fn_stored_value_index (ifn); > if (stored_value_index >= 0 > && use == gimple_call_arg (call, stored_value_index)) > @@ -1280,7 +1302,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, > stmt_vec_info stmt_vinfo, > vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); > > gcc_assert (vector_type); > - tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > + /* A masked load can have a default SSA definition as else operand. > + We should "vectorize" this instead of creating a duplicate from the > + scalar default. */ > + tree vop; > + if (TREE_CODE (op) == SSA_NAME > + && SSA_NAME_IS_DEFAULT_DEF (op) > + && VAR_P (SSA_NAME_VAR (op))) > + vop = get_or_create_ssa_default_def (cfun, > + create_tmp_var (vector_type)); > + else > + vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL); > while (ncopies--) > vec_oprnds->quick_push (vop); > } > @@ -1492,7 +1524,10 @@ static tree permute_vec_elements (vec_info *, tree, > tree, tree, stmt_vec_info, > > Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial > vectors is not supported, otherwise record the required rgroup control > - types. */ > + types. > + > + If partial vectors can be used and ELSVALS is nonzero the supported > + else values will be added to the vector ELSVALS points to. */ > > static void > check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, > @@ -1502,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > vect_memory_access_type > memory_access_type, > gather_scatter_info *gs_info, > - tree scalar_mask) > + tree scalar_mask, > + vec<int> *elsvals = nullptr) > { > /* Invariant loads need no special support. */ > if (memory_access_type == VMAT_INVARIANT) > @@ -1518,7 +1554,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (slp_node) > nvectors /= group_size; > internal_fn ifn > - = (is_load ? vect_load_lanes_supported (vectype, group_size, true) > + = (is_load ? vect_load_lanes_supported (vectype, group_size, true, > + elsvals) > : vect_store_lanes_supported (vectype, group_size, true)); > if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > @@ -1548,12 +1585,14 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, > gs_info->memory_type, > gs_info->offset_vectype, > - gs_info->scale)) > + gs_info->scale, > + elsvals)) > vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); > else if (internal_gather_scatter_fn_supported_p (ifn, vectype, > gs_info->memory_type, > gs_info->offset_vectype, > - gs_info->scale)) > + gs_info->scale, > + elsvals)) > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > scalar_mask); > else > @@ -1607,7 +1646,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > machine_mode mask_mode; > machine_mode vmode; > bool using_partial_vectors_p = false; > - if (get_len_load_store_mode (vecmode, is_load).exists (&vmode)) > + if (get_len_load_store_mode > + (vecmode, is_load, nullptr, elsvals).exists (&vmode)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE > (vecmode); > @@ -1615,7 +1655,8 @@ check_load_store_for_partial_vectors (loop_vec_info > loop_vinfo, tree vectype, > using_partial_vectors_p = true; > } > else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode) > - && can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) > + && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL, > + elsvals)) > { > nvectors = group_memory_nvectors (group_size * vf, nunits); > vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, > scalar_mask); > @@ -1672,12 +1713,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree > mask_type, tree loop_mask, > without loss of precision, where X is STMT_INFO's DR_STEP. > > Return true if this is possible, describing the gather load or scatter > - store in GS_INFO. MASKED_P is true if the load or store is conditional. > */ > + store in GS_INFO. MASKED_P is true if the load or store is conditional. > + > + If we can use gather/scatter and ELSVALS is nonzero the supported > + else values will be stored in the vector ELSVALS points to. */ > > static bool > vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + vec<int> *elsvals) > { > dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); > data_reference *dr = dr_info->dr; > @@ -1734,7 +1779,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > tree memory_type = TREE_TYPE (DR_REF (dr)); > if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, > vectype, memory_type, offset_type, scale, > - &gs_info->ifn, &gs_info->offset_vectype) > + &gs_info->ifn, &gs_info->offset_vectype, > + elsvals) > || gs_info->ifn == IFN_LAST) > continue; > > @@ -1762,17 +1808,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info > stmt_info, > vectorize STMT_INFO, which is a grouped or strided load or store. > MASKED_P is true if load or store is conditional. When returning > true, fill in GS_INFO with the information required to perform the > - operation. */ > + operation. > + > + If we can use gather/scatter and ELSVALS is nonzero the supported > + else values will be stored in the vector ELSVALS points to. */ > > static bool > vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, bool masked_p, > - gather_scatter_info *gs_info) > + gather_scatter_info *gs_info, > + vec<int> *elsvals) > { > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) > || gs_info->ifn == IFN_LAST) > return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, > - masked_p, gs_info); > + masked_p, gs_info, elsvals); > > tree old_offset_type = TREE_TYPE (gs_info->offset); > tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); > @@ -1974,7 +2024,11 @@ vector_vector_composition_type (tree vtype, > poly_uint64 nelts, tree *ptype) > For stores, the statements in the group are all consecutive > and there is no gap at the end. For loads, the statements in the > group might not be consecutive; there can be gaps between statements > - as well as at the end. */ > + as well as at the end. > + > + If we can use gather/scatter and ELSVALS is nonzero the supported > + else values will be stored in the vector ELSVALS points to. > +*/ > > static bool > get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, > @@ -1985,7 +2039,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + vec<int> *elsvals) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; > @@ -2074,7 +2129,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > else if (slp_node->ldst_lanes > && (*lanes_ifn > = (vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, > masked_p) > + ? vect_load_lanes_supported (vectype, group_size, > + masked_p, elsvals) > : vect_store_lanes_supported (vectype, group_size, > masked_p))) != > IFN_LAST) > *memory_access_type = VMAT_LOAD_STORE_LANES; > @@ -2244,7 +2300,8 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > /* Otherwise try using LOAD/STORE_LANES. */ > *lanes_ifn > = vls_type == VLS_LOAD > - ? vect_load_lanes_supported (vectype, group_size, masked_p) > + ? vect_load_lanes_supported (vectype, group_size, masked_p, > + elsvals) > : vect_store_lanes_supported (vectype, group_size, > masked_p); > if (*lanes_ifn != IFN_LAST) > @@ -2278,7 +2335,7 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > && single_element_p > && loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER; > > if (*memory_access_type == VMAT_GATHER_SCATTER > @@ -2340,7 +2397,10 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > SLP says whether we're performing SLP rather than loop vectorization. > MASKED_P is true if the statement is conditional on a vectorized mask. > VECTYPE is the vector type that the vectorized statements will use. > - NCOPIES is the number of vector statements that will be needed. */ > + NCOPIES is the number of vector statements that will be needed. > + > + If ELSVALS is nonzero the supported else values will be stored in the > + vector ELSVALS points to. */ > > static bool > get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, > @@ -2352,7 +2412,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > dr_alignment_support *alignment_support_scheme, > int *misalignment, > gather_scatter_info *gs_info, > - internal_fn *lanes_ifn) > + internal_fn *lanes_ifn, > + vec<int> *elsvals = nullptr) > { > loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > @@ -2361,7 +2422,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) > { > *memory_access_type = VMAT_GATHER_SCATTER; > - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)) > + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, > + elsvals)) > gcc_unreachable (); > /* When using internal functions, we rely on pattern recognition > to convert the type of the offset to the type that the target > @@ -2415,7 +2477,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > masked_p, > vls_type, memory_access_type, poffset, > alignment_support_scheme, > - misalignment, gs_info, lanes_ifn)) > + misalignment, gs_info, lanes_ifn, > + elsvals)) > return false; > } > else if (STMT_VINFO_STRIDED_P (stmt_info)) > @@ -2423,7 +2486,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info > stmt_info, > gcc_assert (!slp_node); > if (loop_vinfo > && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, > - masked_p, gs_info)) > + masked_p, gs_info, elsvals)) > *memory_access_type = VMAT_GATHER_SCATTER; > else > *memory_access_type = VMAT_ELEMENTWISE; > @@ -2692,6 +2755,30 @@ vect_build_zero_merge_argument (vec_info *vinfo, > return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL); > } > > +/* Return the corresponding else value for an else value constant > + ELSVAL with type TYPE. */ > + > +tree > +vect_get_mask_load_else (int elsval, tree type) > +{ > + tree els; > + if (elsval == MASK_LOAD_ELSE_UNDEFINED) > + { > + tree tmp = create_tmp_var (type); > + /* No need to warn about anything. */ > + TREE_NO_WARNING (tmp) = 1; > + els = get_or_create_ssa_default_def (cfun, tmp); > + } > + else if (elsval == MASK_LOAD_ELSE_M1) > + els = build_minus_one_cst (type); > + else if (elsval == MASK_LOAD_ELSE_ZERO) > + els = build_zero_cst (type); > + else > + gcc_unreachable (); > + > + return els; > +} > + > /* Build a gather load call while vectorizing STMT_INFO. Insert new > instructions before GSI and add them to VEC_STMT. GS_INFO describes > the gather load operation. If the load is conditional, MASK is the > @@ -9989,6 +10076,7 @@ vectorizable_load (vec_info *vinfo, > gather_scatter_info gs_info; > tree ref_type; > enum vect_def_type mask_dt = vect_unknown_def_type; > + enum vect_def_type els_dt = vect_unknown_def_type; > > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > return false; > @@ -10001,8 +10089,12 @@ vectorizable_load (vec_info *vinfo, > return false; > > tree mask = NULL_TREE, mask_vectype = NULL_TREE; > + tree els = NULL_TREE; tree els_vectype = NULL_TREE; > + > int mask_index = -1; > + int els_index = -1; > slp_tree slp_op = NULL; > + slp_tree els_op = NULL; > if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) > { > scalar_dest = gimple_assign_lhs (assign); > @@ -10042,6 +10134,15 @@ vectorizable_load (vec_info *vinfo, > && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, > &mask, &slp_op, &mask_dt, &mask_vectype)) > return false; > + > + els_index = internal_fn_else_index (ifn); > + if (els_index >= 0 && slp_node) > + els_index = vect_slp_child_index_for_operand > + (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); > + if (els_index >= 0 > + && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index, > + &els, &els_op, &els_dt, &els_vectype)) > + return false; > } > > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > @@ -10144,12 +10245,23 @@ vectorizable_load (vec_info *vinfo, > int misalignment; > poly_int64 poffset; > internal_fn lanes_ifn; > + auto_vec<int> elsvals; > + int maskload_elsval = 0; > + bool need_zeroing = false; > if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, > VLS_LOAD, > ncopies, &memory_access_type, &poffset, > &alignment_support_scheme, &misalignment, &gs_info, > - &lanes_ifn)) > + &lanes_ifn, &elsvals)) > return false; > > + > + /* We might need to explicitly zero inactive elements if there are > + padding bits in the type that might leak otherwise. > + Refer to PR115336. */ > + tree scalar_type = TREE_TYPE (scalar_dest); > + bool type_mode_padding_p > + = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER > (mode)); > + > /* ??? The following checks should really be part of > get_group_load_store_type. */ > if (slp > @@ -10213,7 +10325,8 @@ vectorizable_load (vec_info *vinfo, > machine_mode vec_mode = TYPE_MODE (vectype); > if (!VECTOR_MODE_P (vec_mode) > || !can_vec_mask_load_store_p (vec_mode, > - TYPE_MODE (mask_vectype), true)) > + TYPE_MODE (mask_vectype), > + true, NULL, &elsvals)) > return false; > } > else if (memory_access_type != VMAT_LOAD_STORE_LANES > @@ -10268,7 +10381,7 @@ vectorizable_load (vec_info *vinfo, > check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > VLS_LOAD, group_size, > memory_access_type, &gs_info, > - mask); > + mask, &elsvals); > > if (dump_enabled_p () > && memory_access_type != VMAT_ELEMENTWISE > @@ -10282,6 +10395,36 @@ vectorizable_load (vec_info *vinfo, > > STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; > } > + else > + { > + /* Here just get the else values. */ > + if (loop_vinfo > + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > + check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, > + VLS_LOAD, group_size, > + memory_access_type, &gs_info, > + mask, &elsvals); > + } > + > + /* If the type needs padding we must zero inactive elements. > + Check if we can do that with a VEC_COND_EXPR and store the > + elsval we choose in MASKLOAD_ELSVAL. */ > + if (elsvals.length () > + && type_mode_padding_p > + && !elsvals.contains (MASK_LOAD_ELSE_ZERO) > + && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "cannot zero inactive elements.\n"); > + return false; > + } > + > + /* For now just use the first available else value. > + get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will > + select it here if it is supported. */ > + if (elsvals.length ()) > + maskload_elsval = *elsvals.begin (); > > if (!slp) > gcc_assert (memory_access_type > @@ -10952,6 +11095,7 @@ vectorizable_load (vec_info *vinfo, > } > > tree vec_mask = NULL_TREE; > + tree vec_els = NULL_TREE; > if (memory_access_type == VMAT_LOAD_STORE_LANES) > { > gcc_assert (alignment_support_scheme == dr_aligned > @@ -11042,6 +11186,14 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (final_mask) > + { > + vec_els = vect_get_mask_load_else (maskload_elsval, vectype); > + if (type_mode_padding_p > + && maskload_elsval != MASK_LOAD_ELSE_ZERO) > + need_zeroing = true; > + } > + > gcall *call; > if (final_len && final_mask) > { > @@ -11050,9 +11202,10 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK, LEN, BIAS). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5, > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6, > dataref_ptr, alias_ptr, > - final_mask, final_len, bias); > + final_mask, vec_els, > + final_len, bias); > } > else if (final_mask) > { > @@ -11061,9 +11214,9 @@ vectorizable_load (vec_info *vinfo, > VEC_MASK). */ > unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > - call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, > + call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4, > dataref_ptr, alias_ptr, > - final_mask); > + final_mask, vec_els); > } > else > { > @@ -11082,7 +11235,8 @@ vectorizable_load (vec_info *vinfo, > for (unsigned i = 0; i < group_size; i++) > { > new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest, > - vec_array, i); > + vec_array, i, need_zeroing, > + final_mask); > if (slp) > slp_node->push_vec_def (new_temp); > else > @@ -11212,25 +11366,36 @@ vectorizable_load (vec_info *vinfo, > } > } > > + if (final_mask) > + { > + vec_els = vect_get_mask_load_else > + (maskload_elsval, vectype); > + if (type_mode_padding_p > + && maskload_elsval != MASK_LOAD_ELSE_ZERO) > + need_zeroing = true; > + } > + > gcall *call; > if (final_len && final_mask) > { > if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) > call = gimple_build_call_internal ( > - IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset, > - scale, zero, final_mask, final_len, bias); > + IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset, > + scale, zero, final_mask, vec_els, final_len, bias); > else > /* Non-vector offset indicates that prefer to take > MASK_LEN_STRIDED_LOAD instead of the > MASK_LEN_GATHER_LOAD with direct stride arg. */ > call = gimple_build_call_internal ( > - IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset, > - zero, final_mask, final_len, bias); > + IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset, > + zero, final_mask, vec_els, final_len, bias); > } > else if (final_mask) > - call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, > - dataref_ptr, vec_offset, > - scale, zero, final_mask); > + call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, > + 6, dataref_ptr, > + vec_offset, scale, > + zero, final_mask, > + vec_els); > else > call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, > dataref_ptr, vec_offset, > @@ -11441,10 +11606,28 @@ vectorizable_load (vec_info *vinfo, > vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); > new_stmt = gimple_build_assign (vec_dest, data_ref); > } > - new_temp = make_ssa_name (vec_dest, new_stmt); > + new_temp = need_zeroing > + ? make_ssa_name (vectype) > + : make_ssa_name (vec_dest, new_stmt); > gimple_set_lhs (new_stmt, new_temp); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > > + /* If we need to explicitly zero inactive elements emit a > + VEC_COND_EXPR that does so. */ > + if (need_zeroing) > + { > + vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO, > + vectype); > + > + tree new_temp2 = make_ssa_name (vec_dest, new_stmt); > + new_stmt > + = gimple_build_assign (new_temp2, VEC_COND_EXPR, > + final_mask, new_temp, vec_els); > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, > + gsi); > + new_temp = new_temp2; > + } > + > /* Store vector loads in the corresponding SLP_NODE. */ > if (slp) > slp_node->push_vec_def (new_stmt); > @@ -11544,6 +11727,7 @@ vectorizable_load (vec_info *vinfo, > tree final_mask = NULL_TREE; > tree final_len = NULL_TREE; > tree bias = NULL_TREE; > + > if (!costing_p) > { > if (mask) > @@ -11636,15 +11820,24 @@ vectorizable_load (vec_info *vinfo, > bias = build_int_cst (intQI_type_node, biasval); > } > > + tree vec_els; > + > if (final_len) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > gcall *call; > if (partial_ifn == IFN_MASK_LEN_LOAD) > - call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5, > - dataref_ptr, ptr, > - final_mask, final_len, > - bias); > + { > + vec_els = vect_get_mask_load_else > + (maskload_elsval, vectype); > + if (type_mode_padding_p > + && maskload_elsval != MASK_LOAD_ELSE_ZERO) > + need_zeroing = true; > + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, > + 6, dataref_ptr, ptr, > + final_mask, vec_els, > + final_len, bias); > + } > else > call = gimple_build_call_internal (IFN_LEN_LOAD, 4, > dataref_ptr, ptr, > @@ -11671,9 +11864,15 @@ vectorizable_load (vec_info *vinfo, > else if (final_mask) > { > tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > - gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3, > + vec_els = vect_get_mask_load_else > + (maskload_elsval, vectype); > + if (type_mode_padding_p > + && maskload_elsval != MASK_LOAD_ELSE_ZERO) > + need_zeroing = true; > + gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4, > dataref_ptr, ptr, > - final_mask); > + final_mask, > + vec_els); > gimple_call_set_nothrow (call, true); > new_stmt = call; > data_ref = NULL_TREE; > @@ -11954,9 +12153,28 @@ vectorizable_load (vec_info *vinfo, > vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); > new_stmt = gimple_build_assign (vec_dest, data_ref); > } > - new_temp = make_ssa_name (vec_dest, new_stmt); > + > + new_temp = need_zeroing > + ? make_ssa_name (vectype) > + : make_ssa_name (vec_dest, new_stmt); > gimple_set_lhs (new_stmt, new_temp); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > + > + /* If we need to explicitly zero inactive elements emit a > + VEC_COND_EXPR that does so. */ > + if (need_zeroing) > + { > + vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO, > + vectype); > + > + tree new_temp2 = make_ssa_name (vec_dest, new_stmt); > + new_stmt > + = gimple_build_assign (new_temp2, VEC_COND_EXPR, > + final_mask, new_temp, vec_els); > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, > + gsi); > + new_temp = new_temp2; > + } > } > > /* 3. Handle explicit realignment if necessary/supported. > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 24227a69d4a..0bd759a92ea 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2418,9 +2418,11 @@ extern bool vect_slp_analyze_instance_alignment > (vec_info *, slp_instance); > extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); > extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); > extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, > - tree, int, internal_fn *, tree *); > + tree, int, internal_fn *, tree *, > + vec<int> * = nullptr); > extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, > - gather_scatter_info *); > + gather_scatter_info *, > + vec<int> * = nullptr); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > vec<data_reference_p> *, > vec<int> *, int); > @@ -2438,7 +2440,8 @@ extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); > -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > bool); > +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, > + bool, vec<int> * = nullptr); > extern void vect_permute_store_chain (vec_info *, vec<tree> &, > unsigned int, stmt_vec_info, > gimple_stmt_iterator *, vec<tree> *); > @@ -2584,6 +2587,7 @@ extern int vect_slp_child_index_for_operand (const > gimple *, int op, bool); > > extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, > gimple_stmt_iterator *); > +extern tree vect_get_mask_load_else (int, tree); > > /* In tree-vect-patterns.cc. */ > extern void > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)