On Thu, 7 Nov 2024, Robin Dapp wrote:

> From: Robin Dapp <rd...@ventanamicro.com>
> 
> This patch adds an else operand to vectorized masked load calls.
> The current implementation adds else-value arguments to the respective
> target-querying functions that is used to supply the vectorizer with the
> proper else value.
> 
> We query the target for its supported else operand and uses that for the
> maskload call.  If necessary, i.e. if the mode has padding bits and if
> the else operand is nonzero, a VEC_COND enforcing a zero else value is
> emitted.

LGTM.

Richard.

> gcc/ChangeLog:
> 
>       * optabs-query.cc (supports_vec_convert_optab_p): Return icode.
>       (get_supported_else_val): Return supported else value for
>       optab's operand at index.
>       (supports_vec_gather_load_p): Add else argument.
>       (supports_vec_scatter_store_p): Ditto.
>       * optabs-query.h (supports_vec_gather_load_p): Ditto.
>       (get_supported_else_val): Ditto.
>       * optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       (target_supports_len_load_store_p): Ditto.
>       (get_len_load_store_mode): Ditto.
>       * optabs-tree.h (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
>       (vect_gather_scatter_fn_p): Ditto.
>       (vect_check_gather_scatter): Ditto.
>       (vect_load_lanes_supported): Ditto.
>       * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
>       Ditto.
>       * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
>       else operand.
>       (vect_slp_analyze_node_operations): Skip undefined else operand.
>       * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
>       Add else operand handling.
>       (vect_get_vec_defs_for_operand): Handle undefined else operand.
>       (check_load_store_for_partial_vectors): Add else argument.
>       (vect_truncate_gather_scatter_offset): Ditto.
>       (vect_use_strided_gather_scatters_p): Ditto.
>       (get_group_load_store_type): Ditto.
>       (get_load_store_type): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
>       (vect_build_one_gather_load_call): Add zero else operand.
>       (vectorizable_load): Use else operand.
>       * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
>       argument.
>       (vect_load_lanes_supported): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
> 
> vect
> ---
>  gcc/optabs-query.cc        |  70 +++++---
>  gcc/optabs-query.h         |   3 +-
>  gcc/optabs-tree.cc         |  66 ++++++--
>  gcc/optabs-tree.h          |   8 +-
>  gcc/tree-vect-data-refs.cc |  74 ++++++---
>  gcc/tree-vect-patterns.cc  |  12 +-
>  gcc/tree-vect-slp.cc       |  25 ++-
>  gcc/tree-vect-stmts.cc     | 326 +++++++++++++++++++++++++++++++------
>  gcc/tree-vectorizer.h      |  10 +-
>  9 files changed, 468 insertions(+), 126 deletions(-)
> 
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index cc52bc0f5ea..c1f3558af92 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "rtl.h"
>  #include "recog.h"
>  #include "vec-perm-indices.h"
> +#include "internal-fn.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>  
>  struct target_optabs default_target_optabs;
>  struct target_optabs *this_fn_optabs = &default_target_optabs;
> @@ -672,34 +675,57 @@ lshift_cheap_p (bool speed_p)
>     that mode, given that the second mode is always an integer vector.
>     If MODE is VOIDmode, return true if OP supports any vector mode.  */
>  
> -static bool
> -supports_vec_convert_optab_p (optab op, machine_mode mode)
> +static enum insn_code
> +supported_vec_convert_optab (optab op, machine_mode mode)
>  {
>    int start = mode == VOIDmode ? 0 : mode;
>    int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
> +  enum insn_code icode = CODE_FOR_nothing;
>    for (int i = start; i <= end; ++i)
>      if (VECTOR_MODE_P ((machine_mode) i))
>        for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
> -     if (convert_optab_handler (op, (machine_mode) i,
> -                                (machine_mode) j) != CODE_FOR_nothing)
> -       return true;
> +     {
> +       if ((icode
> +            = convert_optab_handler (op, (machine_mode) i,
> +                                     (machine_mode) j)) != CODE_FOR_nothing)
> +         return icode;
> +     }
>  
> -  return false;
> +  return icode;
>  }
>  
>  /* If MODE is not VOIDmode, return true if vec_gather_load is available for
>     that mode.  If MODE is VOIDmode, return true if gather_load is available
> -   for at least one vector mode.  */
> +   for at least one vector mode.
> +   In that case, and if ELSVALS is nonzero, store the supported else values
> +   into the vector it points to.  */
>  
>  bool
> -supports_vec_gather_load_p (machine_mode mode)
> +supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals)
>  {
> -  if (!this_fn_optabs->supports_vec_gather_load[mode])
> -    this_fn_optabs->supports_vec_gather_load[mode]
> -      = (supports_vec_convert_optab_p (gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
> -      ? 1 : -1);
> +  enum insn_code icode = CODE_FOR_nothing;
> +  if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals)
> +    {
> +      /* Try the masked variants first.  In case we later decide that we
> +      need a mask after all (thus requiring an else operand) we need
> +      to query it below and we cannot do that when using the
> +      non-masked optab.  */
> +      icode = supported_vec_convert_optab (mask_gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supported_vec_convert_optab (mask_len_gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supported_vec_convert_optab (gather_load_optab, mode);
> +      this_fn_optabs->supports_vec_gather_load[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
> +
> +  /* For gather the optab's operand indices do not match the IFN's because
> +     the latter does not have the extension operand (operand 3).  It is
> +     implicitly added during expansion so we use the IFN's else index + 1.
> +     */
> +  if (elsvals && icode != CODE_FOR_nothing)
> +    get_supported_else_vals
> +      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
>  
>    return this_fn_optabs->supports_vec_gather_load[mode] > 0;
>  }
> @@ -711,12 +737,18 @@ supports_vec_gather_load_p (machine_mode mode)
>  bool
>  supports_vec_scatter_store_p (machine_mode mode)
>  {
> +  enum insn_code icode;
>    if (!this_fn_optabs->supports_vec_scatter_store[mode])
> -    this_fn_optabs->supports_vec_scatter_store[mode]
> -      = (supports_vec_convert_optab_p (scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
> -      ? 1 : -1);
> +    {
> +      icode = supported_vec_convert_optab (scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supported_vec_convert_optab (mask_scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supported_vec_convert_optab (mask_len_scatter_store_optab,
> +                                           mode);
> +      this_fn_optabs->supports_vec_scatter_store[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
>  
>    return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
>  }
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 0cb2c21ba85..f38b1e5d5bb 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -191,7 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
>  bool can_atomic_exchange_p (machine_mode, bool);
>  bool can_atomic_load_p (machine_mode);
>  bool lshift_cheap_p (bool);
> -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
> +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode,
> +                              vec<int> * = nullptr);
>  bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
>  bool can_vec_extract (machine_mode, machine_mode);
>  
> diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
> index b69a5bc3676..3d2d782ea32 100644
> --- a/gcc/optabs-tree.cc
> +++ b/gcc/optabs-tree.cc
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "optabs.h"
>  #include "optabs-tree.h"
>  #include "stor-layout.h"
> +#include "internal-fn.h"
>  
>  /* Return the optab used for computing the operation given by the tree code,
>     CODE and the tree EXP.  This function is not always usable (for example, 
> it
> @@ -552,24 +553,38 @@ target_supports_op_p (tree type, enum tree_code code,
>     or mask_len_{load,store}.
>     This helper function checks whether target supports masked
>     load/store and return corresponding IFN in the last argument
> -   (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).  */
> +   (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> +   If there is support and ELSVALS is nonzero store the possible else values
> +   in the vector it points to.  */
>  
> -static bool
> +bool
>  target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
> -                                bool is_load, internal_fn *ifn)
> +                                bool is_load, internal_fn *ifn,
> +                                vec<int> *elsvals)
>  {
>    optab op = is_load ? maskload_optab : maskstore_optab;
>    optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> -  if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (op, mode, mask_mode))
> +      != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
> +      if (elsvals && is_load)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LOAD),
> +                              *elsvals);
>        return true;
>      }
> -  else if (convert_optab_handler (len_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +  else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
> +        != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsvals && is_load)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LEN_LOAD),
> +                              *elsvals);
>        return true;
>      }
>    return false;
> @@ -578,19 +593,23 @@ target_supports_mask_load_store_p (machine_mode mode, 
> machine_mode mask_mode,
>  /* Return true if target supports vector masked load/store for mode.
>     An additional output in the last argument which is the IFN pointer.
>     We set IFN as MASK_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
> -   which optab is supported in the target.  */
> +   which optab is supported in the target.
> +   If there is support and ELSVALS is nonzero store the possible else values
> +   in the vector it points to.  */
>  
>  bool
>  can_vec_mask_load_store_p (machine_mode mode,
>                          machine_mode mask_mode,
>                          bool is_load,
> -                        internal_fn *ifn)
> +                        internal_fn *ifn,
> +                        vec<int> *elsvals)
>  {
>    machine_mode vmode;
>  
>    /* If mode is vector mode, check it directly.  */
>    if (VECTOR_MODE_P (mode))
> -    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
> +    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
> +                                           elsvals);
>  
>    /* Otherwise, return true if there is some vector mode with
>       the mask load/store supported.  */
> @@ -604,7 +623,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    vmode = targetm.vectorize.preferred_simd_mode (smode);
>    if (VECTOR_MODE_P (vmode)
>        && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                         elsvals))
>      return true;
>  
>    auto_vector_modes vector_modes;
> @@ -612,7 +632,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    for (machine_mode base_mode : vector_modes)
>      if (related_vector_mode (base_mode, smode).exists (&vmode)
>       && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                           elsvals))
>        return true;
>    return false;
>  }
> @@ -622,11 +643,13 @@ can_vec_mask_load_store_p (machine_mode mode,
>     or mask_len_{load,store}.
>     This helper function checks whether target supports len
>     load/store and return corresponding IFN in the last argument
> -   (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).  */
> +   (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> +   If there is support and ELSVALS is nonzero store thepossible
> +   else values in the vector it points to.  */
>  
>  static bool
>  target_supports_len_load_store_p (machine_mode mode, bool is_load,
> -                               internal_fn *ifn)
> +                               internal_fn *ifn, vec<int> *elsvals)
>  {
>    optab op = is_load ? len_load_optab : len_store_optab;
>    optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> @@ -638,11 +661,17 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>        return true;
>      }
>    machine_mode mask_mode;
> +  enum insn_code icode;
>    if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
> -      && convert_optab_handler (masked_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +      && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
> +       != CODE_FOR_nothing))
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsvals && is_load)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LEN_LOAD),
> +                              *elsvals);
>        return true;
>      }
>    return false;
> @@ -656,22 +685,25 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>     VnQI to wrap the other supportable same size vector modes.
>     An additional output in the last argument which is the IFN pointer.
>     We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
> -   which optab is supported in the target.  */
> +   which optab is supported in the target.
> +   If there is support and ELSVALS is nonzero store the possible else values
> +   in the vector it points to.  */
>  
>  opt_machine_mode
> -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
> +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
> +                      vec<int> *elsvals)
>  {
>    gcc_assert (VECTOR_MODE_P (mode));
>  
>    /* Check if length in lanes supported for this mode directly.  */
> -  if (target_supports_len_load_store_p (mode, is_load, ifn))
> +  if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals))
>      return mode;
>  
>    /* Check if length in bytes supported for same vector size VnQI.  */
>    machine_mode vmode;
>    poly_uint64 nunits = GET_MODE_SIZE (mode);
>    if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
> -      && target_supports_len_load_store_p (vmode, is_load, ifn))
> +      && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals))
>      return vmode;
>  
>    return opt_machine_mode ();
> diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
> index 85805fd8296..37102c94f0c 100644
> --- a/gcc/optabs-tree.h
> +++ b/gcc/optabs-tree.h
> @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code = 
> ERROR_MARK);
>  void init_tree_optimization_optabs (tree);
>  bool target_supports_op_p (tree, enum tree_code,
>                          enum optab_subtype = optab_default);
> +bool target_supports_mask_load_store_p (machine_mode, machine_mode,
> +                                bool, internal_fn *, vec<int> *);
>  bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool,
> -                             internal_fn * = nullptr);
> +                             internal_fn * = nullptr,
> +                             vec<int> * = nullptr);
>  opt_machine_mode get_len_load_store_mode (machine_mode, bool,
> -                                       internal_fn * = nullptr);
> +                                       internal_fn * = nullptr,
> +                                       vec<int> * = nullptr);
>  
>  #endif
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 54ad5c8f3dc..f57c6750166 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -55,13 +55,18 @@ along with GCC; see the file COPYING3.  If not see
>  #include "vec-perm-indices.h"
>  #include "internal-fn.h"
>  #include "gimple-fold.h"
> +#include "optabs-query.h"
>  
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
> -   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
> +   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.
> +
> +   If it is implemented and ELSVALS is nonzero store the possible else
> +   values in the vector it points to.  */
>  
>  static bool
>  vect_lanes_optab_supported_p (const char *name, convert_optab optab,
> -                           tree vectype, unsigned HOST_WIDE_INT count)
> +                           tree vectype, unsigned HOST_WIDE_INT count,
> +                           vec<int> *elsvals = nullptr)
>  {
>    machine_mode mode, array_mode;
>    bool limit_p;
> @@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>       }
>      }
>  
> -  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (optab, array_mode, mode))
> +      == CODE_FOR_nothing)
>      {
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -92,8 +99,13 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>  
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
> -                     "can use %s<%s><%s>\n", name, GET_MODE_NAME 
> (array_mode),
> -                     GET_MODE_NAME (mode));
> +                  "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
> +                  GET_MODE_NAME (mode));
> +
> +  if (elsvals)
> +    get_supported_else_vals (icode,
> +                          internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
> +                          *elsvals);
>  
>    return true;
>  }
> @@ -4184,13 +4196,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
> loop_vinfo)
>     be multiplied *after* it has been converted to address width.
>  
>     Return true if the function is supported, storing the function id in
> -   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.  */
> +   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> +
> +   If we can use gather and store the possible else values in ELSVALS.  */
>  
>  bool
>  vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
>                         tree vectype, tree memory_type, tree offset_type,
>                         int scale, internal_fn *ifn_out,
> -                       tree *offset_vectype_out)
> +                       tree *offset_vectype_out, vec<int> *elsvals)
>  {
>    unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
>    unsigned int element_bits = vector_element_bits (vectype);
> @@ -4228,7 +4242,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>  
>        /* Test whether the target supports this combination.  */
>        if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> -                                               offset_vectype, scale))
> +                                               offset_vectype, scale,
> +                                               elsvals))
>       {
>         *ifn_out = ifn;
>         *offset_vectype_out = offset_vectype;
> @@ -4238,7 +4253,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>              && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
>                                                         memory_type,
>                                                         offset_vectype,
> -                                                       scale))
> +                                                       scale, elsvals))
>       {
>         *ifn_out = alt_ifn;
>         *offset_vectype_out = offset_vectype;
> @@ -4246,7 +4261,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>       }
>        else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
>                                                      memory_type,
> -                                                    offset_vectype, scale))
> +                                                    offset_vectype, scale,
> +                                                    elsvals))
>       {
>         *ifn_out = alt_ifn2;
>         *offset_vectype_out = offset_vectype;
> @@ -4285,11 +4301,13 @@ vect_describe_gather_scatter_call (stmt_vec_info 
> stmt_info,
>  }
>  
>  /* Return true if a non-affine read or write in STMT_INFO is suitable for a
> -   gather load or scatter store.  Describe the operation in *INFO if so.  */
> +   gather load or scatter store.  Describe the operation in *INFO if so.
> +   If it is suitable and ELSVALS is nonzero store the supported else values
> +   in the vector it points to.  */
>  
>  bool
>  vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
> -                        gather_scatter_info *info)
> +                        gather_scatter_info *info, vec<int> *elsvals)
>  {
>    HOST_WIDE_INT scale = 1;
>    poly_int64 pbitpos, pbitsize;
> @@ -4314,6 +4332,13 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>        if (internal_gather_scatter_fn_p (ifn))
>       {
>         vect_describe_gather_scatter_call (stmt_info, info);
> +
> +       /* In pattern recog we simply used a ZERO else value that
> +          we need to correct here.  To that end just re-use the
> +          (already succesful) check if we support a gather IFN
> +          and have it populate the else values.  */
> +       if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
> +         supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
>         return true;
>       }
>        masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
> @@ -4322,7 +4347,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>    /* True if we should aim to use internal functions rather than
>       built-in functions.  */
>    bool use_ifn_p = (DR_IS_READ (dr)
> -                 ? supports_vec_gather_load_p (TYPE_MODE (vectype))
> +                 ? supports_vec_gather_load_p (TYPE_MODE (vectype),
> +                                               elsvals)
>                   : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
>  
>    base = DR_REF (dr);
> @@ -4479,12 +4505,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>                                               masked_p, vectype, memory_type,
>                                               signed_char_type_node,
>                                               new_scale, &ifn,
> -                                             &offset_vectype)
> +                                             &offset_vectype,
> +                                             elsvals)
>                 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                               masked_p, vectype, memory_type,
>                                               unsigned_char_type_node,
>                                               new_scale, &ifn,
> -                                             &offset_vectype))
> +                                             &offset_vectype,
> +                                             elsvals))
>               break;
>             scale = new_scale;
>             off = op0;
> @@ -4507,7 +4535,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>             && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                          masked_p, vectype, memory_type,
>                                          TREE_TYPE (off), scale, &ifn,
> -                                        &offset_vectype))
> +                                        &offset_vectype, elsvals))
>           break;
>  
>         if (TYPE_PRECISION (TREE_TYPE (op0))
> @@ -4561,7 +4589,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>      {
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                    vectype, memory_type, offtype, scale,
> -                                  &ifn, &offset_vectype))
> +                                  &ifn, &offset_vectype, elsvals))
>       ifn = IFN_LAST;
>        decl = NULL_TREE;
>      }
> @@ -6398,27 +6426,29 @@ vect_grouped_load_supported (tree vectype, bool 
> single_element_p,
>  }
>  
>  /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT 
> vectors
> -   of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
> +   of type VECTYPE.  MASKED_P says whether the masked form is needed.
> +   If it is available and ELSVALS is nonzero store the possible else values
> +   in the vector it points to.  */
>  
>  internal_fn
>  vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> -                        bool masked_p)
> +                        bool masked_p, vec<int> *elsvals)
>  {
>    if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
>                                   vec_mask_len_load_lanes_optab, vectype,
> -                                 count))
> +                                 count, elsvals))
>      return IFN_MASK_LEN_LOAD_LANES;
>    else if (masked_p)
>      {
>        if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
>                                       vec_mask_load_lanes_optab, vectype,
> -                                     count))
> +                                     count, elsvals))
>       return IFN_MASK_LOAD_LANES;
>      }
>    else
>      {
>        if (vect_lanes_optab_supported_p ("vec_load_lanes", 
> vec_load_lanes_optab,
> -                                     vectype, count))
> +                                     vectype, count, elsvals))
>       return IFN_LOAD_LANES;
>      }
>    return IFN_LAST;
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a708234304f..eb0e5808f7f 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6021,12 +6021,20 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    /* Build the new pattern statement.  */
>    tree scale = size_int (gs_info.scale);
>    gcall *pattern_stmt;
> +
>    if (DR_IS_READ (dr))
>      {
>        tree zero = build_zero_cst (gs_info.element_type);
>        if (mask != NULL)
> -     pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> -                                                offset, scale, zero, mask);
> +     {
> +       int elsval = MASK_LOAD_ELSE_ZERO;
> +
> +       tree vec_els
> +         = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> +                                                  offset, scale, zero, mask,
> +                                                  vec_els);
> +     }
>        else
>       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
>                                                  offset, scale, zero);
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 97c362d24f8..2986cc3fc4c 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -511,13 +511,13 @@ static const int cond_expr_maps[3][5] = {
>  static const int no_arg_map[] = { 0 };
>  static const int arg0_map[] = { 1, 0 };
>  static const int arg1_map[] = { 1, 1 };
> -static const int arg2_map[] = { 1, 2 };
> -static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg2_arg3_map[] = { 2, 2, 3 };
> +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
>  static const int arg3_arg2_map[] = { 2, 3, 2 };
>  static const int op1_op0_map[] = { 2, 1, 0 };
>  static const int off_map[] = { 1, -3 };
>  static const int off_op0_map[] = { 2, -3, 0 };
> -static const int off_arg2_map[] = { 2, -3, 2 };
> +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
>  static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
>  static const int mask_call_maps[6][7] = {
>    { 1, 1, },
> @@ -564,14 +564,14 @@ vect_get_operand_map (const gimple *stmt, bool 
> gather_scatter_p = false,
>       switch (gimple_call_internal_fn (call))
>         {
>         case IFN_MASK_LOAD:
> -         return gather_scatter_p ? off_arg2_map : arg2_map;
> +         return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>  
>         case IFN_GATHER_LOAD:
>           return arg1_map;
>  
>         case IFN_MASK_GATHER_LOAD:
>         case IFN_MASK_LEN_GATHER_LOAD:
> -         return arg1_arg4_map;
> +         return arg1_arg4_arg5_map;
>  
>         case IFN_MASK_STORE:
>           return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> @@ -2675,7 +2675,8 @@ out:
>             tree op0;
>             tree uniform_val = op0 = oprnd_info->ops[0];
>             for (j = 1; j < oprnd_info->ops.length (); ++j)
> -             if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
> +             if (!oprnd_info->ops[j]
> +                 || !operand_equal_p (uniform_val, oprnd_info->ops[j]))
>                 {
>                   uniform_val = NULL_TREE;
>                   break;
> @@ -7928,6 +7929,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
> slp_tree node,
>         tree vector_type = SLP_TREE_VECTYPE (child);
>         if (!vector_type)
>           {
> +           /* Masked loads can have an undefined (default SSA definition)
> +              else operand.  We do not need to cost it.  */
> +           vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
> +           if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
> +                == load_vec_info_type)
> +               && ((ops.length ()
> +                    && TREE_CODE (ops[0]) == SSA_NAME
> +                    && SSA_NAME_IS_DEFAULT_DEF (ops[0])
> +                    && VAR_P (SSA_NAME_VAR (ops[0])))
> +                   || SLP_TREE_DEF_TYPE (child) == vect_constant_def))
> +             continue;
> +
>             /* For shifts with a scalar argument we don't need
>                to cost or code-generate anything.
>                ???  Represent this more explicitely.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 9a2c2ea753e..a90885eabe3 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "regs.h"
>  #include "attribs.h"
>  #include "optabs-libfuncs.h"
> +#include "tree-dfa.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -157,28 +158,45 @@ create_vector_array (tree elem_type, unsigned 
> HOST_WIDE_INT nelems)
>  /* ARRAY is an array of vectors created by create_vector_array.
>     Return an SSA_NAME for the vector in index N.  The reference
>     is part of the vectorization of STMT_INFO and the vector is associated
> -   with scalar destination SCALAR_DEST.  */
> +   with scalar destination SCALAR_DEST.
> +   If we need to ensure that inactive elements are set to zero,
> +   NEED_ZEROING is true, MASK contains the loop mask to be used.  */
>  
>  static tree
>  read_vector_array (vec_info *vinfo,
>                  stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
> -                tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
> +                tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
> +                bool need_zeroing, tree mask)
>  {
> -  tree vect_type, vect, vect_name, array_ref;
> +  tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
>    gimple *new_stmt;
>  
>    gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
>    vect_type = TREE_TYPE (TREE_TYPE (array));
> +  tmp = vect_create_destination_var (scalar_dest, vect_type);
>    vect = vect_create_destination_var (scalar_dest, vect_type);
>    array_ref = build4 (ARRAY_REF, vect_type, array,
>                     build_int_cst (size_type_node, n),
>                     NULL_TREE, NULL_TREE);
>  
> -  new_stmt = gimple_build_assign (vect, array_ref);
> -  vect_name = make_ssa_name (vect, new_stmt);
> -  gimple_assign_set_lhs (new_stmt, vect_name);
> +  new_stmt = gimple_build_assign (tmp, array_ref);
> +  tmp_name = make_ssa_name (vect, new_stmt);
> +  gimple_assign_set_lhs (new_stmt, tmp_name);
>    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
>  
> +  if (need_zeroing)
> +    {
> +      tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
> +                                           vect_type);
> +      vect_name = make_ssa_name (vect, new_stmt);
> +      new_stmt
> +     = gimple_build_assign (vect_name, VEC_COND_EXPR,
> +                            mask, tmp_name, vec_els);
> +      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> +    }
> +  else
> +    vect_name = tmp_name;
> +
>    return vect_name;
>  }
>  
> @@ -469,6 +487,10 @@ exist_non_indexing_operands_for_use_p (tree use, 
> stmt_vec_info stmt_info)
>         if (mask_index >= 0
>             && use == gimple_call_arg (call, mask_index))
>           return true;
> +       int els_index = internal_fn_else_index (ifn);
> +       if (els_index >= 0
> +           && use == gimple_call_arg (call, els_index))
> +         return true;
>         int stored_value_index = internal_fn_stored_value_index (ifn);
>         if (stored_value_index >= 0
>             && use == gimple_call_arg (call, stored_value_index))
> @@ -1280,7 +1302,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, 
> stmt_vec_info stmt_vinfo,
>       vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
>  
>        gcc_assert (vector_type);
> -      tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
> +      /* A masked load can have a default SSA definition as else operand.
> +      We should "vectorize" this instead of creating a duplicate from the
> +      scalar default.  */
> +      tree vop;
> +      if (TREE_CODE (op) == SSA_NAME
> +       && SSA_NAME_IS_DEFAULT_DEF (op)
> +       && VAR_P (SSA_NAME_VAR (op)))
> +     vop = get_or_create_ssa_default_def (cfun,
> +                                          create_tmp_var (vector_type));
> +      else
> +     vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
>        while (ncopies--)
>       vec_oprnds->quick_push (vop);
>      }
> @@ -1492,7 +1524,10 @@ static tree permute_vec_elements (vec_info *, tree, 
> tree, tree, stmt_vec_info,
>  
>     Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
>     vectors is not supported, otherwise record the required rgroup control
> -   types.  */
> +   types.
> +
> +   If partial vectors can be used and ELSVALS is nonzero the supported
> +   else values will be added to the vector ELSVALS points to.  */
>  
>  static void
>  check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
> @@ -1502,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                                     vect_memory_access_type
>                                     memory_access_type,
>                                     gather_scatter_info *gs_info,
> -                                   tree scalar_mask)
> +                                   tree scalar_mask,
> +                                   vec<int> *elsvals = nullptr)
>  {
>    /* Invariant loads need no special support.  */
>    if (memory_access_type == VMAT_INVARIANT)
> @@ -1518,7 +1554,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        if (slp_node)
>       nvectors /= group_size;
>        internal_fn ifn
> -     = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
> +     = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
> +                                             elsvals)
>                  : vect_store_lanes_supported (vectype, group_size, true));
>        if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> @@ -1548,12 +1585,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
>                                                 gs_info->memory_type,
>                                                 gs_info->offset_vectype,
> -                                               gs_info->scale))
> +                                               gs_info->scale,
> +                                               elsvals))
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
>        else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
>                                                      gs_info->memory_type,
>                                                      gs_info->offset_vectype,
> -                                                    gs_info->scale))
> +                                                    gs_info->scale,
> +                                                    elsvals))
>       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
>                              scalar_mask);
>        else
> @@ -1607,7 +1646,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>    machine_mode mask_mode;
>    machine_mode vmode;
>    bool using_partial_vectors_p = false;
> -  if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
> +  if (get_len_load_store_mode
> +      (vecmode, is_load, nullptr, elsvals).exists (&vmode))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE 
> (vecmode);
> @@ -1615,7 +1655,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        using_partial_vectors_p = true;
>      }
>    else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
> -        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
> +        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
> +                                      elsvals))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 
> scalar_mask);
> @@ -1672,12 +1713,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree 
> mask_type, tree loop_mask,
>     without loss of precision, where X is STMT_INFO's DR_STEP.
>  
>     Return true if this is possible, describing the gather load or scatter
> -   store in GS_INFO.  MASKED_P is true if the load or store is conditional.  
> */
> +   store in GS_INFO.  MASKED_P is true if the load or store is conditional.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero the supported
> +   else values will be stored in the vector ELSVALS points to.  */
>  
>  static bool
>  vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
>                                    loop_vec_info loop_vinfo, bool masked_p,
> -                                  gather_scatter_info *gs_info)
> +                                  gather_scatter_info *gs_info,
> +                                  vec<int> *elsvals)
>  {
>    dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
>    data_reference *dr = dr_info->dr;
> @@ -1734,7 +1779,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>        tree memory_type = TREE_TYPE (DR_REF (dr));
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                    vectype, memory_type, offset_type, scale,
> -                                  &gs_info->ifn, &gs_info->offset_vectype)
> +                                  &gs_info->ifn, &gs_info->offset_vectype,
> +                                  elsvals)
>         || gs_info->ifn == IFN_LAST)
>       continue;
>  
> @@ -1762,17 +1808,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>     vectorize STMT_INFO, which is a grouped or strided load or store.
>     MASKED_P is true if load or store is conditional.  When returning
>     true, fill in GS_INFO with the information required to perform the
> -   operation.  */
> +   operation.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero the supported
> +   else values will be stored in the vector ELSVALS points to.  */
>  
>  static bool
>  vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
>                                   loop_vec_info loop_vinfo, bool masked_p,
> -                                 gather_scatter_info *gs_info)
> +                                 gather_scatter_info *gs_info,
> +                                 vec<int> *elsvals)
>  {
> -  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
> +  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
>        || gs_info->ifn == IFN_LAST)
>      return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
> -                                             masked_p, gs_info);
> +                                             masked_p, gs_info, elsvals);
>  
>    tree old_offset_type = TREE_TYPE (gs_info->offset);
>    tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
> @@ -1974,7 +2024,11 @@ vector_vector_composition_type (tree vtype, 
> poly_uint64 nelts, tree *ptype)
>     For stores, the statements in the group are all consecutive
>     and there is no gap at the end.  For loads, the statements in the
>     group might not be consecutive; there can be gaps between statements
> -   as well as at the end.  */
> +   as well as at the end.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero the supported
> +   else values will be stored in the vector ELSVALS points to.
> +*/
>  
>  static bool
>  get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
> @@ -1985,7 +2039,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>                          dr_alignment_support *alignment_support_scheme,
>                          int *misalignment,
>                          gather_scatter_info *gs_info,
> -                        internal_fn *lanes_ifn)
> +                        internal_fn *lanes_ifn,
> +                        vec<int> *elsvals)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2074,7 +2129,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>         else if (slp_node->ldst_lanes
>                  && (*lanes_ifn
>                        = (vls_type == VLS_LOAD
> -                         ? vect_load_lanes_supported (vectype, group_size, 
> masked_p)
> +                         ? vect_load_lanes_supported (vectype, group_size,
> +                                                      masked_p, elsvals)
>                           : vect_store_lanes_supported (vectype, group_size,
>                                                         masked_p))) != 
> IFN_LAST)
>           *memory_access_type = VMAT_LOAD_STORE_LANES;
> @@ -2244,7 +2300,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>             /* Otherwise try using LOAD/STORE_LANES.  */
>             *lanes_ifn
>               = vls_type == VLS_LOAD
> -                 ? vect_load_lanes_supported (vectype, group_size, masked_p)
> +                 ? vect_load_lanes_supported (vectype, group_size, masked_p,
> +                                              elsvals)
>                   : vect_store_lanes_supported (vectype, group_size,
>                                                 masked_p);
>             if (*lanes_ifn != IFN_LAST)
> @@ -2278,7 +2335,7 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>        && single_element_p
>        && loop_vinfo
>        && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                          masked_p, gs_info))
> +                                          masked_p, gs_info, elsvals))
>      *memory_access_type = VMAT_GATHER_SCATTER;
>  
>    if (*memory_access_type == VMAT_GATHER_SCATTER
> @@ -2340,7 +2397,10 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>     SLP says whether we're performing SLP rather than loop vectorization.
>     MASKED_P is true if the statement is conditional on a vectorized mask.
>     VECTYPE is the vector type that the vectorized statements will use.
> -   NCOPIES is the number of vector statements that will be needed.  */
> +   NCOPIES is the number of vector statements that will be needed.
> +
> +   If ELSVALS is nonzero the supported else values will be stored in the
> +   vector ELSVALS points to.  */
>  
>  static bool
>  get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
> @@ -2352,7 +2412,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                    dr_alignment_support *alignment_support_scheme,
>                    int *misalignment,
>                    gather_scatter_info *gs_info,
> -                  internal_fn *lanes_ifn)
> +                  internal_fn *lanes_ifn,
> +                  vec<int> *elsvals = nullptr)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> @@ -2361,7 +2422,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>      {
>        *memory_access_type = VMAT_GATHER_SCATTER;
> -      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
> +      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> +                                   elsvals))
>       gcc_unreachable ();
>        /* When using internal functions, we rely on pattern recognition
>        to convert the type of the offset to the type that the target
> @@ -2415,7 +2477,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                                     masked_p,
>                                     vls_type, memory_access_type, poffset,
>                                     alignment_support_scheme,
> -                                   misalignment, gs_info, lanes_ifn))
> +                                   misalignment, gs_info, lanes_ifn,
> +                                   elsvals))
>       return false;
>      }
>    else if (STMT_VINFO_STRIDED_P (stmt_info))
> @@ -2423,7 +2486,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>        gcc_assert (!slp_node);
>        if (loop_vinfo
>         && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                              masked_p, gs_info))
> +                                              masked_p, gs_info, elsvals))
>       *memory_access_type = VMAT_GATHER_SCATTER;
>        else
>       *memory_access_type = VMAT_ELEMENTWISE;
> @@ -2692,6 +2755,30 @@ vect_build_zero_merge_argument (vec_info *vinfo,
>    return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
>  }
>  
> +/* Return the corresponding else value for an else value constant
> +   ELSVAL with type TYPE.  */
> +
> +tree
> +vect_get_mask_load_else (int elsval, tree type)
> +{
> +  tree els;
> +  if (elsval == MASK_LOAD_ELSE_UNDEFINED)
> +    {
> +      tree tmp = create_tmp_var (type);
> +      /* No need to warn about anything.  */
> +      TREE_NO_WARNING (tmp) = 1;
> +      els = get_or_create_ssa_default_def (cfun, tmp);
> +    }
> +  else if (elsval == MASK_LOAD_ELSE_M1)
> +    els = build_minus_one_cst (type);
> +  else if (elsval == MASK_LOAD_ELSE_ZERO)
> +    els = build_zero_cst (type);
> +  else
> +    gcc_unreachable ();
> +
> +  return els;
> +}
> +
>  /* Build a gather load call while vectorizing STMT_INFO.  Insert new
>     instructions before GSI and add them to VEC_STMT.  GS_INFO describes
>     the gather load operation.  If the load is conditional, MASK is the
> @@ -9989,6 +10076,7 @@ vectorizable_load (vec_info *vinfo,
>    gather_scatter_info gs_info;
>    tree ref_type;
>    enum vect_def_type mask_dt = vect_unknown_def_type;
> +  enum vect_def_type els_dt = vect_unknown_def_type;
>  
>    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>      return false;
> @@ -10001,8 +10089,12 @@ vectorizable_load (vec_info *vinfo,
>      return false;
>  
>    tree mask = NULL_TREE, mask_vectype = NULL_TREE;
> +  tree els = NULL_TREE; tree els_vectype = NULL_TREE;
> +
>    int mask_index = -1;
> +  int els_index = -1;
>    slp_tree slp_op = NULL;
> +  slp_tree els_op = NULL;
>    if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
>      {
>        scalar_dest = gimple_assign_lhs (assign);
> @@ -10042,6 +10134,15 @@ vectorizable_load (vec_info *vinfo,
>         && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
>                                     &mask, &slp_op, &mask_dt, &mask_vectype))
>       return false;
> +
> +      els_index = internal_fn_else_index (ifn);
> +      if (els_index >= 0 && slp_node)
> +     els_index = vect_slp_child_index_for_operand
> +       (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> +      if (els_index >= 0
> +       && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
> +                               &els, &els_op, &els_dt, &els_vectype))
> +     return false;
>      }
>  
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -10144,12 +10245,23 @@ vectorizable_load (vec_info *vinfo,
>    int misalignment;
>    poly_int64 poffset;
>    internal_fn lanes_ifn;
> +  auto_vec<int> elsvals;
> +  int maskload_elsval = 0;
> +  bool need_zeroing = false;
>    if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> VLS_LOAD,
>                           ncopies, &memory_access_type, &poffset,
>                           &alignment_support_scheme, &misalignment, &gs_info,
> -                         &lanes_ifn))
> +                         &lanes_ifn, &elsvals))
>      return false;
>  
> +
> +  /* We might need to explicitly zero inactive elements if there are
> +     padding bits in the type that might leak otherwise.
> +     Refer to PR115336.  */
> +  tree scalar_type = TREE_TYPE (scalar_dest);
> +  bool type_mode_padding_p
> +    = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER 
> (mode));
> +
>    /* ???  The following checks should really be part of
>       get_group_load_store_type.  */
>    if (slp
> @@ -10213,7 +10325,8 @@ vectorizable_load (vec_info *vinfo,
>         machine_mode vec_mode = TYPE_MODE (vectype);
>         if (!VECTOR_MODE_P (vec_mode)
>             || !can_vec_mask_load_store_p (vec_mode,
> -                                          TYPE_MODE (mask_vectype), true))
> +                                          TYPE_MODE (mask_vectype),
> +                                          true, NULL, &elsvals))
>           return false;
>       }
>        else if (memory_access_type != VMAT_LOAD_STORE_LANES
> @@ -10268,7 +10381,7 @@ vectorizable_load (vec_info *vinfo,
>       check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
>                                             VLS_LOAD, group_size,
>                                             memory_access_type, &gs_info,
> -                                           mask);
> +                                           mask, &elsvals);
>  
>        if (dump_enabled_p ()
>         && memory_access_type != VMAT_ELEMENTWISE
> @@ -10282,6 +10395,36 @@ vectorizable_load (vec_info *vinfo,
>  
>        STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
>      }
> +  else
> +    {
> +      /* Here just get the else values.  */
> +      if (loop_vinfo
> +       && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> +     check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> +                                           VLS_LOAD, group_size,
> +                                           memory_access_type, &gs_info,
> +                                           mask, &elsvals);
> +    }
> +
> +  /* If the type needs padding we must zero inactive elements.
> +     Check if we can do that with a VEC_COND_EXPR and store the
> +     elsval we choose in MASKLOAD_ELSVAL.  */
> +  if (elsvals.length ()
> +      && type_mode_padding_p
> +      && !elsvals.contains (MASK_LOAD_ELSE_ZERO)
> +      && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
> +    {
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                      "cannot zero inactive elements.\n");
> +      return false;
> +    }
> +
> +  /* For now just use the first available else value.
> +     get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
> +     select it here if it is supported.  */
> +  if (elsvals.length ())
> +    maskload_elsval = *elsvals.begin ();
>  
>    if (!slp)
>      gcc_assert (memory_access_type
> @@ -10952,6 +11095,7 @@ vectorizable_load (vec_info *vinfo,
>      }
>  
>    tree vec_mask = NULL_TREE;
> +  tree vec_els = NULL_TREE;
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        gcc_assert (alignment_support_scheme == dr_aligned
> @@ -11042,6 +11186,14 @@ vectorizable_load (vec_info *vinfo,
>               }
>           }
>  
> +       if (final_mask)
> +         {
> +           vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
> +           if (type_mode_padding_p
> +               && maskload_elsval != MASK_LOAD_ELSE_ZERO)
> +             need_zeroing = true;
> +         }
> +
>         gcall *call;
>         if (final_len && final_mask)
>           {
> @@ -11050,9 +11202,10 @@ vectorizable_load (vec_info *vinfo,
>                                                   VEC_MASK, LEN, BIAS).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
> +           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask, final_len, bias);
> +                                              final_mask, vec_els,
> +                                              final_len, bias);
>           }
>         else if (final_mask)
>           {
> @@ -11061,9 +11214,9 @@ vectorizable_load (vec_info *vinfo,
>                                               VEC_MASK).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
> +           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask);
> +                                              final_mask, vec_els);
>           }
>         else
>           {
> @@ -11082,7 +11235,8 @@ vectorizable_load (vec_info *vinfo,
>         for (unsigned i = 0; i < group_size; i++)
>           {
>             new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
> -                                         vec_array, i);
> +                                         vec_array, i, need_zeroing,
> +                                         final_mask);
>             if (slp)
>               slp_node->push_vec_def (new_temp);
>             else
> @@ -11212,25 +11366,36 @@ vectorizable_load (vec_info *vinfo,
>                       }
>                   }
>  
> +               if (final_mask)
> +                 {
> +                   vec_els = vect_get_mask_load_else
> +                     (maskload_elsval, vectype);
> +                   if (type_mode_padding_p
> +                       && maskload_elsval != MASK_LOAD_ELSE_ZERO)
> +                     need_zeroing = true;
> +                 }
> +
>                 gcall *call;
>                 if (final_len && final_mask)
>                   {
>                     if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
>                       call = gimple_build_call_internal (
> -                       IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
> -                       scale, zero, final_mask, final_len, bias);
> +                       IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset,
> +                       scale, zero, final_mask, vec_els, final_len, bias);
>                     else
>                       /* Non-vector offset indicates that prefer to take
>                          MASK_LEN_STRIDED_LOAD instead of the
>                          MASK_LEN_GATHER_LOAD with direct stride arg.  */
>                       call = gimple_build_call_internal (
> -                       IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset,
> -                       zero, final_mask, final_len, bias);
> +                       IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset,
> +                       zero, final_mask, vec_els, final_len, bias);
>                   }
>                 else if (final_mask)
> -                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
> -                                                    dataref_ptr, vec_offset,
> -                                                    scale, zero, final_mask);
> +                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> +                                                    6, dataref_ptr,
> +                                                    vec_offset, scale,
> +                                                    zero, final_mask,
> +                                                    vec_els);
>                 else
>                   call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
>                                                      dataref_ptr, vec_offset,
> @@ -11441,10 +11606,28 @@ vectorizable_load (vec_info *vinfo,
>                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
>                 new_stmt = gimple_build_assign (vec_dest, data_ref);
>               }
> -           new_temp = make_ssa_name (vec_dest, new_stmt);
> +           new_temp = need_zeroing
> +             ? make_ssa_name (vectype)
> +             : make_ssa_name (vec_dest, new_stmt);
>             gimple_set_lhs (new_stmt, new_temp);
>             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
>  
> +           /* If we need to explicitly zero inactive elements emit a
> +              VEC_COND_EXPR that does so.  */
> +           if (need_zeroing)
> +             {
> +               vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
> +                                                  vectype);
> +
> +               tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
> +               new_stmt
> +                 = gimple_build_assign (new_temp2, VEC_COND_EXPR,
> +                                        final_mask, new_temp, vec_els);
> +               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
> +                                            gsi);
> +               new_temp = new_temp2;
> +             }
> +
>             /* Store vector loads in the corresponding SLP_NODE.  */
>             if (slp)
>               slp_node->push_vec_def (new_stmt);
> @@ -11544,6 +11727,7 @@ vectorizable_load (vec_info *vinfo,
>         tree final_mask = NULL_TREE;
>         tree final_len = NULL_TREE;
>         tree bias = NULL_TREE;
> +
>         if (!costing_p)
>           {
>             if (mask)
> @@ -11636,15 +11820,24 @@ vectorizable_load (vec_info *vinfo,
>                   bias = build_int_cst (intQI_type_node, biasval);
>                 }
>  
> +             tree vec_els;
> +
>               if (final_len)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
>                   gcall *call;
>                   if (partial_ifn == IFN_MASK_LEN_LOAD)
> -                   call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
> -                                                      dataref_ptr, ptr,
> -                                                      final_mask, final_len,
> -                                                      bias);
> +                   {
> +                     vec_els = vect_get_mask_load_else
> +                       (maskload_elsval, vectype);
> +                     if (type_mode_padding_p
> +                         && maskload_elsval != MASK_LOAD_ELSE_ZERO)
> +                       need_zeroing = true;
> +                     call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
> +                                                        6, dataref_ptr, ptr,
> +                                                        final_mask, vec_els,
> +                                                        final_len, bias);
> +                   }
>                   else
>                     call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
>                                                        dataref_ptr, ptr,
> @@ -11671,9 +11864,15 @@ vectorizable_load (vec_info *vinfo,
>               else if (final_mask)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
> -                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
> +                 vec_els = vect_get_mask_load_else
> +                   (maskload_elsval, vectype);
> +                 if (type_mode_padding_p
> +                     && maskload_elsval != MASK_LOAD_ELSE_ZERO)
> +                   need_zeroing = true;
> +                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
>                                                             dataref_ptr, ptr,
> -                                                           final_mask);
> +                                                           final_mask,
> +                                                           vec_els);
>                   gimple_call_set_nothrow (call, true);
>                   new_stmt = call;
>                   data_ref = NULL_TREE;
> @@ -11954,9 +12153,28 @@ vectorizable_load (vec_info *vinfo,
>                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
>                 new_stmt = gimple_build_assign (vec_dest, data_ref);
>               }
> -           new_temp = make_ssa_name (vec_dest, new_stmt);
> +
> +           new_temp = need_zeroing
> +             ? make_ssa_name (vectype)
> +             : make_ssa_name (vec_dest, new_stmt);
>             gimple_set_lhs (new_stmt, new_temp);
>             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> +
> +           /* If we need to explicitly zero inactive elements emit a
> +              VEC_COND_EXPR that does so.  */
> +           if (need_zeroing)
> +             {
> +               vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
> +                                                  vectype);
> +
> +               tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
> +               new_stmt
> +                 = gimple_build_assign (new_temp2, VEC_COND_EXPR,
> +                                        final_mask, new_temp, vec_els);
> +               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
> +                                            gsi);
> +               new_temp = new_temp2;
> +             }
>           }
>  
>         /* 3. Handle explicit realignment if necessary/supported.
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 24227a69d4a..0bd759a92ea 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2418,9 +2418,11 @@ extern bool vect_slp_analyze_instance_alignment 
> (vec_info *, slp_instance);
>  extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
>  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
> -                                   tree, int, internal_fn *, tree *);
> +                                   tree, int, internal_fn *, tree *,
> +                                   vec<int> * = nullptr);
>  extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
> -                                    gather_scatter_info *);
> +                                    gather_scatter_info *,
> +                                    vec<int> * = nullptr);
>  extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
>                                                vec<data_reference_p> *,
>                                                vec<int> *, int);
> @@ -2438,7 +2440,8 @@ extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
>  extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
>  extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
> -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
> +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
> +                                           bool, vec<int> * = nullptr);
>  extern void vect_permute_store_chain (vec_info *, vec<tree> &,
>                                     unsigned int, stmt_vec_info,
>                                     gimple_stmt_iterator *, vec<tree> *);
> @@ -2584,6 +2587,7 @@ extern int vect_slp_child_index_for_operand (const 
> gimple *, int op, bool);
>  
>  extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
>                             gimple_stmt_iterator *);
> +extern tree vect_get_mask_load_else (int, tree);
>  
>  /* In tree-vect-patterns.cc.  */
>  extern void
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to