On Fri, 18 Oct 2024, Robin Dapp wrote:

> This patch adds an else operand to vectorized masked load calls.
> The current implementation adds else-value arguments to the respective
> target-querying functions that is used to supply the vectorizer with the
> proper else value.
> 
> Right now, the only spot where a zero else value is actually enforced is
> tree-ifcvt.  Loop masking and other instances of masked loads in the
> vectorizer itself do not use vec_cond_exprs.
> 
> gcc/ChangeLog:
> 
>       * optabs-query.cc (supports_vec_convert_optab_p): Return icode.
>       (get_supported_else_val): Return supported else value for
>       optab's operand at index.
>       (supports_vec_gather_load_p): Add else argument.
>       (supports_vec_scatter_store_p): Ditto.
>       * optabs-query.h (supports_vec_gather_load_p): Ditto.
>       (get_supported_else_val): Ditto.
>       * optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       (target_supports_len_load_store_p): Ditto.
>       (get_len_load_store_mode): Ditto.
>       * optabs-tree.h (target_supports_mask_load_store_p): Ditto.
>       (can_vec_mask_load_store_p): Ditto.
>       * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
>       (vect_gather_scatter_fn_p): Ditto.
>       (vect_check_gather_scatter): Ditto.
>       (vect_load_lanes_supported): Ditto.
>       * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
>       Ditto.
>       * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
>       else operand.
>       (vect_slp_analyze_node_operations): Skip undefined else operand.
>       * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
>       Add else operand handling.
>       (vect_get_vec_defs_for_operand): Handle undefined else operand.
>       (check_load_store_for_partial_vectors): Add else argument.
>       (vect_truncate_gather_scatter_offset): Ditto.
>       (vect_use_strided_gather_scatters_p): Ditto.
>       (get_group_load_store_type): Ditto.
>       (get_load_store_type): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
>       (vect_build_one_gather_load_call): Add zero else operand.
>       (vectorizable_load): Use else operand.
>       * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
>       argument.
>       (vect_load_lanes_supported): Ditto.
>       (vect_get_mask_load_else): Ditto.
>       (vect_get_else_val_from_tree): Ditto.
> ---
>  gcc/optabs-query.cc        |  59 ++++++---
>  gcc/optabs-query.h         |   3 +-
>  gcc/optabs-tree.cc         |  62 ++++++---
>  gcc/optabs-tree.h          |   8 +-
>  gcc/tree-vect-data-refs.cc |  77 +++++++----
>  gcc/tree-vect-patterns.cc  |  18 ++-
>  gcc/tree-vect-slp.cc       |  22 +++-
>  gcc/tree-vect-stmts.cc     | 257 +++++++++++++++++++++++++++++--------
>  gcc/tree-vectorizer.h      |  11 +-
>  9 files changed, 394 insertions(+), 123 deletions(-)
> 
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index cc52bc0f5ea..347a1322479 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "rtl.h"
>  #include "recog.h"
>  #include "vec-perm-indices.h"
> +#include "internal-fn.h"
> +#include "memmodel.h"
> +#include "optabs.h"
>  
>  struct target_optabs default_target_optabs;
>  struct target_optabs *this_fn_optabs = &default_target_optabs;
> @@ -672,34 +675,48 @@ lshift_cheap_p (bool speed_p)
>     that mode, given that the second mode is always an integer vector.
>     If MODE is VOIDmode, return true if OP supports any vector mode.  */
>  
> -static bool
> +static enum insn_code
>  supports_vec_convert_optab_p (optab op, machine_mode mode)

The name is bad now.  supported_vec_convert_optab (...) maybe?

>  {
>    int start = mode == VOIDmode ? 0 : mode;
>    int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
> +  enum insn_code icode = CODE_FOR_nothing;
>    for (int i = start; i <= end; ++i)
>      if (VECTOR_MODE_P ((machine_mode) i))
>        for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
> -     if (convert_optab_handler (op, (machine_mode) i,
> -                                (machine_mode) j) != CODE_FOR_nothing)
> -       return true;
> +     {
> +       if ((icode
> +            = convert_optab_handler (op, (machine_mode) i,
> +                                     (machine_mode) j)) != CODE_FOR_nothing)
> +         return icode;
> +     }
>  
> -  return false;
> +  return icode;
>  }
>  
>  /* If MODE is not VOIDmode, return true if vec_gather_load is available for
>     that mode.  If MODE is VOIDmode, return true if gather_load is available
> -   for at least one vector mode.  */
> +   for at least one vector mode.
> +   In that case, and if ELSVALS is nonzero, store the supported else values
> +   into the vector it points to.  */
>  
>  bool
> -supports_vec_gather_load_p (machine_mode mode)
> +supports_vec_gather_load_p (machine_mode mode, auto_vec<int> *elsvals)
>  {
> -  if (!this_fn_optabs->supports_vec_gather_load[mode])
> -    this_fn_optabs->supports_vec_gather_load[mode]
> -      = (supports_vec_convert_optab_p (gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
> -      ? 1 : -1);
> +  enum insn_code icode = CODE_FOR_nothing;
> +  if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals)
> +    {
> +      icode = supports_vec_convert_optab_p (gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_gather_load_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_len_gather_load_optab, mode);
> +      this_fn_optabs->supports_vec_gather_load[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
> +
> +  if (elsvals && icode != CODE_FOR_nothing)
> +    get_supported_else_vals (icode, MASK_LOAD_GATHER_ELSE_IDX, *elsvals);
>  
>    return this_fn_optabs->supports_vec_gather_load[mode] > 0;
>  }
> @@ -711,12 +728,18 @@ supports_vec_gather_load_p (machine_mode mode)
>  bool
>  supports_vec_scatter_store_p (machine_mode mode)
>  {
> +  enum insn_code icode;
>    if (!this_fn_optabs->supports_vec_scatter_store[mode])
> -    this_fn_optabs->supports_vec_scatter_store[mode]
> -      = (supports_vec_convert_optab_p (scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
> -      || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
> -      ? 1 : -1);
> +    {
> +      icode = supports_vec_convert_optab_p (scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_scatter_store_optab, mode);
> +      if (icode == CODE_FOR_nothing)
> +     icode = supports_vec_convert_optab_p (mask_len_scatter_store_optab,
> +                                           mode);
> +      this_fn_optabs->supports_vec_scatter_store[mode]
> +     = (icode != CODE_FOR_nothing) ? 1 : -1;
> +    }
>  
>    return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
>  }
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 0cb2c21ba85..5e0f59ee4b9 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -191,7 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
>  bool can_atomic_exchange_p (machine_mode, bool);
>  bool can_atomic_load_p (machine_mode);
>  bool lshift_cheap_p (bool);
> -bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
> +bool supports_vec_gather_load_p (machine_mode = E_VOIDmode,
> +                              auto_vec<int> * = nullptr);
>  bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
>  bool can_vec_extract (machine_mode, machine_mode);
>  
> diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
> index b69a5bc3676..ebdb6051c14 100644
> --- a/gcc/optabs-tree.cc
> +++ b/gcc/optabs-tree.cc
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "optabs.h"
>  #include "optabs-tree.h"
>  #include "stor-layout.h"
> +#include "internal-fn.h"
>  
>  /* Return the optab used for computing the operation given by the tree code,
>     CODE and the tree EXP.  This function is not always usable (for example, 
> it
> @@ -552,24 +553,38 @@ target_supports_op_p (tree type, enum tree_code code,
>     or mask_len_{load,store}.
>     This helper function checks whether target supports masked
>     load/store and return corresponding IFN in the last argument
> -   (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).  */
> +   (IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> +   If there is support and ELSVALS is nonzero add the possible else values
> +   to the vector it points to.  */
>  
> -static bool
> +bool
>  target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
> -                                bool is_load, internal_fn *ifn)
> +                                bool is_load, internal_fn *ifn,
> +                                auto_vec<int> *elsvals)
>  {
>    optab op = is_load ? maskload_optab : maskstore_optab;
>    optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> -  if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (op, mode, mask_mode))
> +      != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
> +      if (elsvals)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LOAD),
> +                              *elsvals);
>        return true;
>      }
> -  else if (convert_optab_handler (len_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +  else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
> +        != CODE_FOR_nothing)
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsvals)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LEN_LOAD),
> +                              *elsvals);
>        return true;
>      }
>    return false;
> @@ -584,13 +599,15 @@ bool
>  can_vec_mask_load_store_p (machine_mode mode,
>                          machine_mode mask_mode,
>                          bool is_load,
> -                        internal_fn *ifn)
> +                        internal_fn *ifn,
> +                        auto_vec<int> *elsvals)
>  {
>    machine_mode vmode;
>  
>    /* If mode is vector mode, check it directly.  */
>    if (VECTOR_MODE_P (mode))
> -    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
> +    return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
> +                                           elsvals);
>  
>    /* Otherwise, return true if there is some vector mode with
>       the mask load/store supported.  */
> @@ -604,7 +621,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    vmode = targetm.vectorize.preferred_simd_mode (smode);
>    if (VECTOR_MODE_P (vmode)
>        && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +      && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                         elsvals))
>      return true;
>  
>    auto_vector_modes vector_modes;
> @@ -612,7 +630,8 @@ can_vec_mask_load_store_p (machine_mode mode,
>    for (machine_mode base_mode : vector_modes)
>      if (related_vector_mode (base_mode, smode).exists (&vmode)
>       && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> -     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
> +     && target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
> +                                           elsvals))
>        return true;
>    return false;
>  }
> @@ -622,11 +641,13 @@ can_vec_mask_load_store_p (machine_mode mode,
>     or mask_len_{load,store}.
>     This helper function checks whether target supports len
>     load/store and return corresponding IFN in the last argument
> -   (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).  */
> +   (IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
> +   If there is support and ELSVALS is nonzero add the possible else values
> +   to the vector it points to.  */
>  
>  static bool
>  target_supports_len_load_store_p (machine_mode mode, bool is_load,
> -                               internal_fn *ifn)
> +                               internal_fn *ifn, auto_vec<int> *elsvals)
>  {
>    optab op = is_load ? len_load_optab : len_store_optab;
>    optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
> @@ -638,11 +659,17 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>        return true;
>      }
>    machine_mode mask_mode;
> +  enum insn_code icode;
>    if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
> -      && convert_optab_handler (masked_op, mode, mask_mode) != 
> CODE_FOR_nothing)
> +      && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
> +       != CODE_FOR_nothing))
>      {
>        if (ifn)
>       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
> +      if (elsvals)
> +     get_supported_else_vals (icode,
> +                              internal_fn_else_index (IFN_MASK_LEN_LOAD),
> +                              *elsvals);
>        return true;
>      }
>    return false;
> @@ -656,22 +683,25 @@ target_supports_len_load_store_p (machine_mode mode, 
> bool is_load,
>     VnQI to wrap the other supportable same size vector modes.
>     An additional output in the last argument which is the IFN pointer.
>     We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
> -   which optab is supported in the target.  */
> +   which optab is supported in the target.
> +   If there is support and ELSVALS is nonzero add the possible else values
> +   to the vector it points to.  */
>  
>  opt_machine_mode
> -get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
> +get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
> +                      auto_vec<int> *elsvals)
>  {
>    gcc_assert (VECTOR_MODE_P (mode));
>  
>    /* Check if length in lanes supported for this mode directly.  */
> -  if (target_supports_len_load_store_p (mode, is_load, ifn))
> +  if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals))
>      return mode;
>  
>    /* Check if length in bytes supported for same vector size VnQI.  */
>    machine_mode vmode;
>    poly_uint64 nunits = GET_MODE_SIZE (mode);
>    if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
> -      && target_supports_len_load_store_p (vmode, is_load, ifn))
> +      && target_supports_len_load_store_p (vmode, is_load, ifn, elsvals))
>      return vmode;
>  
>    return opt_machine_mode ();
> diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
> index f2b49991462..390954bf998 100644
> --- a/gcc/optabs-tree.h
> +++ b/gcc/optabs-tree.h
> @@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
>  void init_tree_optimization_optabs (tree);
>  bool target_supports_op_p (tree, enum tree_code,
>                          enum optab_subtype = optab_default);
> +bool target_supports_mask_load_store_p (machine_mode, machine_mode,
> +                                bool, internal_fn *, auto_vec<int> *);
>  bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool,
> -                             internal_fn * = nullptr);
> +                             internal_fn * = nullptr,
> +                             auto_vec<int> * = nullptr);
>  opt_machine_mode get_len_load_store_mode (machine_mode, bool,
> -                                       internal_fn * = nullptr);
> +                                       internal_fn * = nullptr,
> +                                       auto_vec<int> * = nullptr);
>  
>  #endif
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 202af7a8952..d9f608dd2c0 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -55,13 +55,18 @@ along with GCC; see the file COPYING3.  If not see
>  #include "vec-perm-indices.h"
>  #include "internal-fn.h"
>  #include "gimple-fold.h"
> +#include "optabs-query.h"
>  
>  /* Return true if load- or store-lanes optab OPTAB is implemented for
> -   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
> +   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.
> +
> +   If it is implemented and ELSVALS is nonzero add the possible else values
> +   to the vector it points to.  */
>  
>  static bool
>  vect_lanes_optab_supported_p (const char *name, convert_optab optab,
> -                           tree vectype, unsigned HOST_WIDE_INT count)
> +                           tree vectype, unsigned HOST_WIDE_INT count,
> +                           auto_vec<int> *elsvals = nullptr)
>  {
>    machine_mode mode, array_mode;
>    bool limit_p;
> @@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>       }
>      }
>  
> -  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
> +  enum insn_code icode;
> +  if ((icode = convert_optab_handler (optab, array_mode, mode))
> +      == CODE_FOR_nothing)
>      {
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -92,8 +99,13 @@ vect_lanes_optab_supported_p (const char *name, 
> convert_optab optab,
>  
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
> -                     "can use %s<%s><%s>\n", name, GET_MODE_NAME 
> (array_mode),
> -                     GET_MODE_NAME (mode));
> +                  "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
> +                  GET_MODE_NAME (mode));
> +
> +  if (elsvals)
> +    get_supported_else_vals (icode,
> +                          internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
> +                          *elsvals);
>  
>    return true;
>  }
> @@ -4177,13 +4189,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
> loop_vinfo)
>     be multiplied *after* it has been converted to address width.
>  
>     Return true if the function is supported, storing the function id in
> -   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.  */
> +   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> +
> +   If we can use gather and add the possible else values to ELSVALS.  */
>  
>  bool
>  vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
>                         tree vectype, tree memory_type, tree offset_type,
>                         int scale, internal_fn *ifn_out,
> -                       tree *offset_vectype_out)
> +                       tree *offset_vectype_out, auto_vec<int> *elsvals)

Do not use auto_vec<int> * either, vec<int> * in this case (and 
elsewhere).

>  {
>    unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
>    unsigned int element_bits = vector_element_bits (vectype);
> @@ -4221,7 +4235,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>  
>        /* Test whether the target supports this combination.  */
>        if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
> -                                               offset_vectype, scale))
> +                                               offset_vectype, scale,
> +                                               elsvals))
>       {
>         *ifn_out = ifn;
>         *offset_vectype_out = offset_vectype;
> @@ -4231,7 +4246,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>              && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
>                                                         memory_type,
>                                                         offset_vectype,
> -                                                       scale))
> +                                                       scale, elsvals))
>       {
>         *ifn_out = alt_ifn;
>         *offset_vectype_out = offset_vectype;
> @@ -4239,7 +4254,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
> bool masked_p,
>       }
>        else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
>                                                      memory_type,
> -                                                    offset_vectype, scale))
> +                                                    offset_vectype, scale,
> +                                                    elsvals))
>       {
>         *ifn_out = alt_ifn2;
>         *offset_vectype_out = offset_vectype;
> @@ -4278,11 +4294,13 @@ vect_describe_gather_scatter_call (stmt_vec_info 
> stmt_info,
>  }
>  
>  /* Return true if a non-affine read or write in STMT_INFO is suitable for a
> -   gather load or scatter store.  Describe the operation in *INFO if so.  */
> +   gather load or scatter store.  Describe the operation in *INFO if so.
> +   If it is suitable and ELSVALS is nonzero add the supported else values
> +   to the vector it points to.  */
>  
>  bool
>  vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
> -                        gather_scatter_info *info)
> +                        gather_scatter_info *info, auto_vec<int> *elsvals)
>  {
>    HOST_WIDE_INT scale = 1;
>    poly_int64 pbitpos, pbitsize;
> @@ -4306,6 +4324,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>        ifn = gimple_call_internal_fn (call);
>        if (internal_gather_scatter_fn_p (ifn))
>       {
> +       /* Extract the else value from a masked-load call.  This is
> +          necessary when we created a gather_scatter pattern from a
> +          maskload.  It is a bit cumbersome to basically create the
> +          same else value three times but it's probably acceptable until
> +          tree-ifcvt goes away.  */
> +       if (internal_fn_mask_index (ifn) >= 0 && elsvals)
> +         {
> +           tree els = gimple_call_arg (call, internal_fn_else_index (ifn));
> +           elsvals->safe_push (vect_get_else_val_from_tree (els));
> +         }
>         vect_describe_gather_scatter_call (stmt_info, info);
>         return true;
>       }
> @@ -4315,7 +4343,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>    /* True if we should aim to use internal functions rather than
>       built-in functions.  */
>    bool use_ifn_p = (DR_IS_READ (dr)
> -                 ? supports_vec_gather_load_p (TYPE_MODE (vectype))
> +                 ? supports_vec_gather_load_p (TYPE_MODE (vectype),
> +                                               elsvals)
>                   : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
>  
>    base = DR_REF (dr);
> @@ -4472,12 +4501,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>                                               masked_p, vectype, memory_type,
>                                               signed_char_type_node,
>                                               new_scale, &ifn,
> -                                             &offset_vectype)
> +                                             &offset_vectype,
> +                                             elsvals)
>                 && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                               masked_p, vectype, memory_type,
>                                               unsigned_char_type_node,
>                                               new_scale, &ifn,
> -                                             &offset_vectype))
> +                                             &offset_vectype,
> +                                             elsvals))
>               break;
>             scale = new_scale;
>             off = op0;
> @@ -4500,7 +4531,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>             && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                          masked_p, vectype, memory_type,
>                                          TREE_TYPE (off), scale, &ifn,
> -                                        &offset_vectype))
> +                                        &offset_vectype, elsvals))
>           break;
>  
>         if (TYPE_PRECISION (TREE_TYPE (op0))
> @@ -4554,7 +4585,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>      {
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                    vectype, memory_type, offtype, scale,
> -                                  &ifn, &offset_vectype))
> +                                  &ifn, &offset_vectype, elsvals))
>       ifn = IFN_LAST;
>        decl = NULL_TREE;
>      }
> @@ -6391,27 +6422,29 @@ vect_grouped_load_supported (tree vectype, bool 
> single_element_p,
>  }
>  
>  /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT 
> vectors
> -   of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
> +   of type VECTYPE.  MASKED_P says whether the masked form is needed.
> +   If it is available and ELSVALS is nonzero add the possible else values
> +   to the vector it points to.  */
>  
>  internal_fn
>  vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> -                        bool masked_p)
> +                        bool masked_p, auto_vec<int> *elsvals)
>  {
>    if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
>                                   vec_mask_len_load_lanes_optab, vectype,
> -                                 count))
> +                                 count, elsvals))
>      return IFN_MASK_LEN_LOAD_LANES;
>    else if (masked_p)
>      {
>        if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
>                                       vec_mask_load_lanes_optab, vectype,
> -                                     count))
> +                                     count, elsvals))
>       return IFN_MASK_LOAD_LANES;
>      }
>    else
>      {
>        if (vect_lanes_optab_supported_p ("vec_load_lanes", 
> vec_load_lanes_optab,
> -                                     vectype, count))
> +                                     vectype, count, elsvals))
>       return IFN_LOAD_LANES;
>      }
>    return IFN_LAST;
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 746f100a084..184d150f96d 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6630,7 +6630,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    /* Make sure that the target supports an appropriate internal
>       function for the gather/scatter operation.  */
>    gather_scatter_info gs_info;
> -  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
> +  auto_vec<int> elsvals;
> +  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info, &elsvals)
>        || gs_info.ifn == IFN_LAST)
>      return NULL;
>  
> @@ -6653,20 +6654,27 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
>                                               gs_info.offset, stmt_info);
>  
> +  tree vec_els = NULL_TREE;
>    /* Build the new pattern statement.  */
>    tree scale = size_int (gs_info.scale);
>    gcall *pattern_stmt;
> +  tree load_lhs;
>    if (DR_IS_READ (dr))
>      {
>        tree zero = build_zero_cst (gs_info.element_type);
>        if (mask != NULL)
> -     pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> -                                                offset, scale, zero, mask);
> +     {
> +       int elsval = *elsvals.begin ();
> +       vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> +       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> +                                                  offset, scale, zero, mask,
> +                                                  vec_els);
> +     }
>        else
>       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
>                                                  offset, scale, zero);
> -      tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> -      gimple_call_set_lhs (pattern_stmt, load_lhs);
> +      load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
> +      gimple_set_lhs (pattern_stmt, load_lhs);
>      }
>    else
>      {
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 8727246c27a..d161f28d62c 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -511,13 +511,13 @@ static const int cond_expr_maps[3][5] = {
>  static const int no_arg_map[] = { 0 };
>  static const int arg0_map[] = { 1, 0 };
>  static const int arg1_map[] = { 1, 1 };
> -static const int arg2_map[] = { 1, 2 };
> -static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg2_arg3_map[] = { 2, 2, 3 };
> +static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
>  static const int arg3_arg2_map[] = { 2, 3, 2 };
>  static const int op1_op0_map[] = { 2, 1, 0 };
>  static const int off_map[] = { 1, -3 };
>  static const int off_op0_map[] = { 2, -3, 0 };
> -static const int off_arg2_map[] = { 2, -3, 2 };
> +static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
>  static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
>  static const int mask_call_maps[6][7] = {
>    { 1, 1, },
> @@ -564,14 +564,14 @@ vect_get_operand_map (const gimple *stmt, bool 
> gather_scatter_p = false,
>       switch (gimple_call_internal_fn (call))
>         {
>         case IFN_MASK_LOAD:
> -         return gather_scatter_p ? off_arg2_map : arg2_map;
> +         return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>  
>         case IFN_GATHER_LOAD:
>           return arg1_map;
>  
>         case IFN_MASK_GATHER_LOAD:
>         case IFN_MASK_LEN_GATHER_LOAD:
> -         return arg1_arg4_map;
> +         return arg1_arg4_arg5_map;
>  
>         case IFN_MASK_STORE:
>           return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> @@ -7775,6 +7775,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
> slp_tree node,
>         tree vector_type = SLP_TREE_VECTYPE (child);
>         if (!vector_type)
>           {
> +           /* Masked loads can have an undefined (default SSA definition)
> +              else operand.  We do not need to cost it.  */
> +           vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
> +           if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
> +                == load_vec_info_type)
> +               && ((ops.length () &&
> +                    TREE_CODE (ops[0]) == SSA_NAME
> +                    && SSA_NAME_IS_DEFAULT_DEF (ops[0])
> +                    && VAR_P (SSA_NAME_VAR (ops[0])))
> +                   || SLP_TREE_DEF_TYPE (child) == vect_constant_def))
> +             continue;
> +
>             /* For shifts with a scalar argument we don't need
>                to cost or code-generate anything.
>                ???  Represent this more explicitely.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 9b14b96cb5a..74a437735a5 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "regs.h"
>  #include "attribs.h"
>  #include "optabs-libfuncs.h"
> +#include "tree-dfa.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -469,6 +470,10 @@ exist_non_indexing_operands_for_use_p (tree use, 
> stmt_vec_info stmt_info)
>         if (mask_index >= 0
>             && use == gimple_call_arg (call, mask_index))
>           return true;
> +       int els_index = internal_fn_else_index (ifn);
> +       if (els_index >= 0
> +           && use == gimple_call_arg (call, els_index))
> +         return true;
>         int stored_value_index = internal_fn_stored_value_index (ifn);
>         if (stored_value_index >= 0
>             && use == gimple_call_arg (call, stored_value_index))
> @@ -1280,7 +1285,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, 
> stmt_vec_info stmt_vinfo,
>       vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
>  
>        gcc_assert (vector_type);
> -      tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
> +      /* A masked load can have a default SSA definition as else operand.
> +      We should "vectorize" this instead of creating a duplicate from the
> +      scalar default.  */
> +      tree vop;
> +      if (TREE_CODE (op) == SSA_NAME
> +       && SSA_NAME_IS_DEFAULT_DEF (op)
> +       && VAR_P (SSA_NAME_VAR (op)))
> +     vop = get_or_create_ssa_default_def (cfun,
> +                                          create_tmp_var (vector_type));
> +      else
> +     vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
>        while (ncopies--)
>       vec_oprnds->quick_push (vop);
>      }
> @@ -1492,7 +1507,10 @@ static tree permute_vec_elements (vec_info *, tree, 
> tree, tree, stmt_vec_info,
>  
>     Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
>     vectors is not supported, otherwise record the required rgroup control
> -   types.  */
> +   types.
> +
> +   If partial vectors can be used and ELSVALS is nonzero the supported
> +   else values will be added to the vector ELSVALS points to.  */
>  
>  static void
>  check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
> @@ -1502,7 +1520,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                                     vect_memory_access_type
>                                     memory_access_type,
>                                     gather_scatter_info *gs_info,
> -                                   tree scalar_mask)
> +                                   tree scalar_mask,
> +                                   auto_vec<int> *elsvals = nullptr)
>  {
>    /* Invariant loads need no special support.  */
>    if (memory_access_type == VMAT_INVARIANT)
> @@ -1518,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        if (slp_node)
>       nvectors /= group_size;
>        internal_fn ifn
> -     = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
> +     = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
> +                                             elsvals)
>                  : vect_store_lanes_supported (vectype, group_size, true));
>        if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> @@ -1548,12 +1568,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
>                                                 gs_info->memory_type,
>                                                 gs_info->offset_vectype,
> -                                               gs_info->scale))
> +                                               gs_info->scale,
> +                                               elsvals))
>       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
>        else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
>                                                      gs_info->memory_type,
>                                                      gs_info->offset_vectype,
> -                                                    gs_info->scale))
> +                                                    gs_info->scale,
> +                                                    elsvals))
>       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
>                              scalar_mask);
>        else
> @@ -1607,7 +1629,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>    machine_mode mask_mode;
>    machine_mode vmode;
>    bool using_partial_vectors_p = false;
> -  if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
> +  if (get_len_load_store_mode
> +      (vecmode, is_load, nullptr, elsvals).exists (&vmode))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE 
> (vecmode);
> @@ -1615,7 +1638,8 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>        using_partial_vectors_p = true;
>      }
>    else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
> -        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
> +        && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
> +                                      elsvals))
>      {
>        nvectors = group_memory_nvectors (group_size * vf, nunits);
>        vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 
> scalar_mask);
> @@ -1672,12 +1696,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree 
> mask_type, tree loop_mask,
>     without loss of precision, where X is STMT_INFO's DR_STEP.
>  
>     Return true if this is possible, describing the gather load or scatter
> -   store in GS_INFO.  MASKED_P is true if the load or store is conditional.  
> */
> +   store in GS_INFO.  MASKED_P is true if the load or store is conditional.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero the supported
> +   else values will be added to the vector ELSVALS points to.  */
>  
>  static bool
>  vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
>                                    loop_vec_info loop_vinfo, bool masked_p,
> -                                  gather_scatter_info *gs_info)
> +                                  gather_scatter_info *gs_info,
> +                                  auto_vec<int> *elsvals)
>  {
>    dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
>    data_reference *dr = dr_info->dr;
> @@ -1734,7 +1762,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>        tree memory_type = TREE_TYPE (DR_REF (dr));
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                    vectype, memory_type, offset_type, scale,
> -                                  &gs_info->ifn, &gs_info->offset_vectype)
> +                                  &gs_info->ifn, &gs_info->offset_vectype,
> +                                  elsvals)
>         || gs_info->ifn == IFN_LAST)
>       continue;
>  
> @@ -1762,17 +1791,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info,
>     vectorize STMT_INFO, which is a grouped or strided load or store.
>     MASKED_P is true if load or store is conditional.  When returning
>     true, fill in GS_INFO with the information required to perform the
> -   operation.  */
> +   operation.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero the supported
> +   else values will be added to the vector ELSVALS points to.  */
>  
>  static bool
>  vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
>                                   loop_vec_info loop_vinfo, bool masked_p,
> -                                 gather_scatter_info *gs_info)
> +                                 gather_scatter_info *gs_info,
> +                                 auto_vec<int> *elsvals)
>  {
> -  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
> +  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
>        || gs_info->ifn == IFN_LAST)
>      return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
> -                                             masked_p, gs_info);
> +                                             masked_p, gs_info, elsvals);
>  
>    tree old_offset_type = TREE_TYPE (gs_info->offset);
>    tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
> @@ -1985,7 +2018,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>                          dr_alignment_support *alignment_support_scheme,
>                          int *misalignment,
>                          gather_scatter_info *gs_info,
> -                        internal_fn *lanes_ifn)
> +                        internal_fn *lanes_ifn,
> +                        auto_vec<int> *elsvals)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2074,7 +2108,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>         else if (slp_node->ldst_lanes
>                  && (*lanes_ifn
>                        = (vls_type == VLS_LOAD
> -                         ? vect_load_lanes_supported (vectype, group_size, 
> masked_p)
> +                         ? vect_load_lanes_supported (vectype, group_size,
> +                                                      masked_p, elsvals)
>                           : vect_store_lanes_supported (vectype, group_size,
>                                                         masked_p))) != 
> IFN_LAST)
>           *memory_access_type = VMAT_LOAD_STORE_LANES;
> @@ -2242,7 +2277,8 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>             /* Otherwise try using LOAD/STORE_LANES.  */
>             *lanes_ifn
>               = vls_type == VLS_LOAD
> -                 ? vect_load_lanes_supported (vectype, group_size, masked_p)
> +                 ? vect_load_lanes_supported (vectype, group_size, masked_p,
> +                                              elsvals)
>                   : vect_store_lanes_supported (vectype, group_size,
>                                                 masked_p);
>             if (*lanes_ifn != IFN_LAST)
> @@ -2276,7 +2312,7 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>        && single_element_p
>        && loop_vinfo
>        && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                          masked_p, gs_info))
> +                                          masked_p, gs_info, elsvals))
>      *memory_access_type = VMAT_GATHER_SCATTER;
>  
>    if (*memory_access_type == VMAT_GATHER_SCATTER
> @@ -2338,7 +2374,10 @@ get_group_load_store_type (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>     SLP says whether we're performing SLP rather than loop vectorization.
>     MASKED_P is true if the statement is conditional on a vectorized mask.
>     VECTYPE is the vector type that the vectorized statements will use.
> -   NCOPIES is the number of vector statements that will be needed.  */
> +   NCOPIES is the number of vector statements that will be needed.
> +
> +   If ELSVALS is nonzero the supported else values will be added to the
> +   vector ELSVALS points to.  */
>  
>  static bool
>  get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
> @@ -2350,7 +2389,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                    dr_alignment_support *alignment_support_scheme,
>                    int *misalignment,
>                    gather_scatter_info *gs_info,
> -                  internal_fn *lanes_ifn)
> +                  internal_fn *lanes_ifn,
> +                  auto_vec<int> *elsvals = nullptr)
>  {
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> @@ -2359,7 +2399,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>      {
>        *memory_access_type = VMAT_GATHER_SCATTER;
> -      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
> +      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> +                                   elsvals))
>       gcc_unreachable ();
>        /* When using internal functions, we rely on pattern recognition
>        to convert the type of the offset to the type that the target
> @@ -2413,7 +2454,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                                     masked_p,
>                                     vls_type, memory_access_type, poffset,
>                                     alignment_support_scheme,
> -                                   misalignment, gs_info, lanes_ifn))
> +                                   misalignment, gs_info, lanes_ifn,
> +                                   elsvals))
>       return false;
>      }
>    else if (STMT_VINFO_STRIDED_P (stmt_info))
> @@ -2421,7 +2463,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>        gcc_assert (!slp_node);
>        if (loop_vinfo
>         && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> -                                              masked_p, gs_info))
> +                                              masked_p, gs_info, elsvals))
>       *memory_access_type = VMAT_GATHER_SCATTER;
>        else
>       *memory_access_type = VMAT_ELEMENTWISE;
> @@ -2689,6 +2731,53 @@ vect_build_zero_merge_argument (vec_info *vinfo,
>    return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
>  }
>  
> +/* Return the supported else value for a masked load internal function IFN.
> +   The vector type is given in VECTYPE and the mask type in VECTYPE2.
> +   TYPE specifies the type of the returned else value.  */
> +
> +tree
> +vect_get_mask_load_else (int elsval, tree type)
> +{
> +  tree els;
> +  if (elsval == MASK_LOAD_ELSE_UNDEFINED)
> +    {
> +      tree tmp = create_tmp_var (type);
> +      /* No need to warn about anything.  */
> +      TREE_NO_WARNING (tmp) = 1;
> +      els = get_or_create_ssa_default_def (cfun, tmp);
> +    }
> +  else if (elsval == MASK_LOAD_ELSE_M1)
> +    els = build_minus_one_cst (type);
> +  else if (elsval == MASK_LOAD_ELSE_ZERO)
> +    els = build_zero_cst (type);
> +  else
> +    __builtin_unreachable ();
> +
> +  return els;
> +}
> +
> +/* Return the integer define a tree else operand ELS represents.
> +   This performs the inverse of vect_get_mask_load_else.  Refer to
> +   vect_check_gather_scatter for its usage rationale.  */
> +
> +int
> +vect_get_else_val_from_tree (tree els)
> +{
> +  if (TREE_CODE (els) == SSA_NAME
> +      && SSA_NAME_IS_DEFAULT_DEF (els)
> +      && TREE_CODE (SSA_NAME_VAR (els)) == VAR_DECL)
> +    return MASK_LOAD_ELSE_UNDEFINED;
> +  else
> +    {
> +      if (zerop (els))
> +     return MASK_LOAD_ELSE_ZERO;
> +      else if (integer_minus_onep (els))
> +     return MASK_LOAD_ELSE_M1;
> +      else
> +     __builtin_unreachable ();
> +    }
> +}
> +
>  /* Build a gather load call while vectorizing STMT_INFO.  Insert new
>     instructions before GSI and add them to VEC_STMT.  GS_INFO describes
>     the gather load operation.  If the load is conditional, MASK is the
> @@ -2770,8 +2859,14 @@ vect_build_one_gather_load_call (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>      }
>  
>    tree scale = build_int_cst (scaletype, gs_info->scale);
> -  gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> -                                     mask_op, scale);
> +  gimple *new_stmt;
> +
> +  if (!mask)
> +    new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> +                               mask_op, scale);
> +  else
> +    new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
> +                               mask_op, scale);
>  
>    if (!useless_type_conversion_p (vectype, rettype))
>      {
> @@ -9967,6 +10062,7 @@ vectorizable_load (vec_info *vinfo,
>    gather_scatter_info gs_info;
>    tree ref_type;
>    enum vect_def_type mask_dt = vect_unknown_def_type;
> +  enum vect_def_type els_dt = vect_unknown_def_type;
>  
>    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>      return false;
> @@ -9979,8 +10075,12 @@ vectorizable_load (vec_info *vinfo,
>      return false;
>  
>    tree mask = NULL_TREE, mask_vectype = NULL_TREE;
> +  tree els = NULL_TREE; tree els_vectype = NULL_TREE;
> +
>    int mask_index = -1;
> +  int els_index = -1;
>    slp_tree slp_op = NULL;
> +  slp_tree els_op = NULL;
>    if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
>      {
>        scalar_dest = gimple_assign_lhs (assign);
> @@ -10020,6 +10120,15 @@ vectorizable_load (vec_info *vinfo,
>         && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
>                                     &mask, &slp_op, &mask_dt, &mask_vectype))
>       return false;
> +
> +      els_index = internal_fn_else_index (ifn);
> +      if (els_index >= 0 && slp_node)
> +     els_index = vect_slp_child_index_for_operand
> +       (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
> +      if (els_index >= 0
> +       && !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
> +                               &els, &els_op, &els_dt, &els_vectype))
> +     return false;
>      }
>  
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -10122,10 +10231,11 @@ vectorizable_load (vec_info *vinfo,
>    int misalignment;
>    poly_int64 poffset;
>    internal_fn lanes_ifn;
> +  auto_vec<int> elsvals;
>    if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> VLS_LOAD,
>                           ncopies, &memory_access_type, &poffset,
>                           &alignment_support_scheme, &misalignment, &gs_info,
> -                         &lanes_ifn))
> +                         &lanes_ifn, &elsvals))
>      return false;
>  
>    /* ???  The following checks should really be part of
> @@ -10191,7 +10301,8 @@ vectorizable_load (vec_info *vinfo,
>         machine_mode vec_mode = TYPE_MODE (vectype);
>         if (!VECTOR_MODE_P (vec_mode)
>             || !can_vec_mask_load_store_p (vec_mode,
> -                                          TYPE_MODE (mask_vectype), true))
> +                                          TYPE_MODE (mask_vectype),
> +                                          true, NULL, &elsvals))
>           return false;
>       }
>        else if (memory_access_type != VMAT_LOAD_STORE_LANES
> @@ -10260,6 +10371,16 @@ vectorizable_load (vec_info *vinfo,
>  
>        STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
>      }
> +  else
> +    {
> +      /* Here just get the else values.  */
> +      if (loop_vinfo
> +       && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> +     check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> +                                           VLS_LOAD, group_size,
> +                                           memory_access_type, &gs_info,
> +                                           mask, &elsvals);
> +    }
>  
>    if (!slp)
>      gcc_assert (memory_access_type
> @@ -10930,6 +11051,7 @@ vectorizable_load (vec_info *vinfo,
>      }
>  
>    tree vec_mask = NULL_TREE;
> +  tree vec_els = NULL_TREE;
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        gcc_assert (alignment_support_scheme == dr_aligned
> @@ -11020,6 +11142,11 @@ vectorizable_load (vec_info *vinfo,
>               }
>           }
>  
> +       if (final_mask)
> +         vec_els = vect_get_mask_load_else
> +           (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> +            ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +
>         gcall *call;
>         if (final_len && final_mask)
>           {
> @@ -11028,9 +11155,10 @@ vectorizable_load (vec_info *vinfo,
>                                                   VEC_MASK, LEN, BIAS).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
> +           call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask, final_len, bias);
> +                                              final_mask, vec_els,
> +                                              final_len, bias);
>           }
>         else if (final_mask)
>           {
> @@ -11039,9 +11167,9 @@ vectorizable_load (vec_info *vinfo,
>                                               VEC_MASK).  */
>             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
>             tree alias_ptr = build_int_cst (ref_type, align);
> -           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
> +           call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
>                                                dataref_ptr, alias_ptr,
> -                                              final_mask);
> +                                              final_mask, vec_els);
>           }
>         else
>           {
> @@ -11190,17 +11318,29 @@ vectorizable_load (vec_info *vinfo,
>                       }
>                   }
>  
> +               if (final_mask)
> +                 vec_els = vect_get_mask_load_else
> +                   (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> +                    ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +
>                 gcall *call;
>                 if (final_len && final_mask)
> -                 call
> -                   = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
> -                                                 dataref_ptr, vec_offset,
> -                                                 scale, zero, final_mask,
> -                                                 final_len, bias);
> +                 {
> +                   call
> +                     = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
> +                                                   8, dataref_ptr,
> +                                                   vec_offset, scale, zero,
> +                                                   final_mask, vec_els,
> +                                                   final_len, bias);
> +                 }
>                 else if (final_mask)
> -                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
> -                                                    dataref_ptr, vec_offset,
> -                                                    scale, zero, final_mask);
> +                 {
> +                   call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> +                                                      6, dataref_ptr,
> +                                                      vec_offset, scale,
> +                                                      zero, final_mask,
> +                                                      vec_els);
> +                 }

why add these unneeded braces?

>                 else
>                   call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
>                                                      dataref_ptr, vec_offset,
> @@ -11514,6 +11654,7 @@ vectorizable_load (vec_info *vinfo,
>         tree final_mask = NULL_TREE;
>         tree final_len = NULL_TREE;
>         tree bias = NULL_TREE;
> +
>         if (!costing_p)
>           {
>             if (mask)
> @@ -11566,7 +11707,8 @@ vectorizable_load (vec_info *vinfo,
>               if (loop_lens)
>                 {
>                   opt_machine_mode new_ovmode
> -                   = get_len_load_store_mode (vmode, true, &partial_ifn);
> +                   = get_len_load_store_mode (vmode, true, &partial_ifn,
> +                                              &elsvals);
>                   new_vmode = new_ovmode.require ();
>                   unsigned factor
>                     = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
> @@ -11578,7 +11720,7 @@ vectorizable_load (vec_info *vinfo,
>                 {
>                   if (!can_vec_mask_load_store_p (
>                         vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
> -                       &partial_ifn))
> +                       &partial_ifn, &elsvals))
>                     gcc_unreachable ();
>                 }
>  
> @@ -11606,19 +11748,28 @@ vectorizable_load (vec_info *vinfo,
>                   bias = build_int_cst (intQI_type_node, biasval);
>                 }
>  
> +             tree vec_els;
> +
>               if (final_len)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
>                   gcall *call;
>                   if (partial_ifn == IFN_MASK_LEN_LOAD)
> -                   call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
> -                                                      dataref_ptr, ptr,
> -                                                      final_mask, final_len,
> -                                                      bias);
> +                   {
> +                     vec_els = vect_get_mask_load_else
> +                       (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> +                        ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +                     call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
> +                                                        6, dataref_ptr, ptr,
> +                                                        final_mask, vec_els,
> +                                                        final_len, bias);
> +                   }
>                   else
> -                   call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> -                                                      dataref_ptr, ptr,
> -                                                      final_len, bias);
> +                   {
> +                     call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> +                                                        dataref_ptr, ptr,
> +                                                        final_len, bias);
> +                   }

Likewise.


Otherwise looks OK to me.

Richard.

>                   gimple_call_set_nothrow (call, true);
>                   new_stmt = call;
>                   data_ref = NULL_TREE;
> @@ -11641,9 +11792,13 @@ vectorizable_load (vec_info *vinfo,
>               else if (final_mask)
>                 {
>                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
> -                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
> +                 vec_els = vect_get_mask_load_else
> +                   (elsvals.contains (MASK_LOAD_ELSE_ZERO)
> +                    ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (), vectype);
> +                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
>                                                             dataref_ptr, ptr,
> -                                                           final_mask);
> +                                                           final_mask,
> +                                                           vec_els);
>                   gimple_call_set_nothrow (call, true);
>                   new_stmt = call;
>                   data_ref = NULL_TREE;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index b7f2708fec0..0b20c36a7fe 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2439,9 +2439,11 @@ extern bool vect_slp_analyze_instance_alignment 
> (vec_info *, slp_instance);
>  extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
>  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
> -                                   tree, int, internal_fn *, tree *);
> +                                   tree, int, internal_fn *, tree *,
> +                                   auto_vec<int> * = nullptr);
>  extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
> -                                    gather_scatter_info *);
> +                                    gather_scatter_info *,
> +                                    auto_vec<int> * = nullptr);
>  extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
>                                                vec<data_reference_p> *,
>                                                vec<int> *, int);
> @@ -2459,7 +2461,8 @@ extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
>  extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
>  extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
> -extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> bool);
> +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
> +                                           bool, auto_vec<int> * = nullptr);
>  extern void vect_permute_store_chain (vec_info *, vec<tree> &,
>                                     unsigned int, stmt_vec_info,
>                                     gimple_stmt_iterator *, vec<tree> *);
> @@ -2605,6 +2608,8 @@ extern int vect_slp_child_index_for_operand (const 
> gimple *, int op, bool);
>  
>  extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
>                             gimple_stmt_iterator *);
> +extern tree vect_get_mask_load_else (int, tree);
> +extern int vect_get_else_val_from_tree (tree els);
>  
>  /* In tree-vect-patterns.cc.  */
>  extern void
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to