On Fri, 18 Oct 2024, Robin Dapp wrote:

> When predicating a load we implicitly assume that the else value is
> zero.  This matters in case the loaded value is padded (like e.g.
> a Bool) and we must ensure that the padding bytes are zero on targets
> that don't implicitly zero inactive elements.
> 
> In order to formalize this this patch queries the target for
> its supported else operand and uses that for the maskload call.
> Subsequently, if the else operand is nonzero, a cond_expr enforcing
> a zero else value is emitted.
> 
> gcc/ChangeLog:
> 
>       * tree-if-conv.cc (predicate_load_or_store): Enforce zero else
>       value for padded types.
>       (predicate_statements): Use sequence instead of statement.
> ---
>  gcc/tree-if-conv.cc | 112 +++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 94 insertions(+), 18 deletions(-)
> 
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 90c754a4814..9623426e1e1 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -2531,12 +2531,15 @@ mask_exists (int size, const vec<int> &vec)
>  
>  /* Helper function for predicate_statements.  STMT is a memory read or
>     write and it needs to be predicated by MASK.  Return a statement
> -   that does so.  */
> +   that does so.  SSA_NAMES is the set of SSA names defined earlier in
> +   STMT's block. */
>  
> -static gimple *
> -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
> +static gimple_seq
> +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask,
> +                      hash_set<tree_ssa_name_hash> *ssa_names)
>  {
> -  gcall *new_stmt;
> +  gimple_seq stmts = NULL;
> +  gcall *call_stmt;
>  
>    tree lhs = gimple_assign_lhs (stmt);
>    tree rhs = gimple_assign_rhs1 (stmt);
> @@ -2552,21 +2555,88 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, 
> gassign *stmt, tree mask)
>                  ref);
>    if (TREE_CODE (lhs) == SSA_NAME)
>      {
> -      new_stmt
> -     = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
> -                                   ptr, mask);
> -      gimple_call_set_lhs (new_stmt, lhs);
> -      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
> +      /* Get the preferred vector mode and its corresponding mask for the
> +      masked load.  We need this to query the target's supported else
> +      operands.  */
> +      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +      scalar_mode smode = as_a <scalar_mode> (mode);
> +
> +      machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> +      machine_mode mask_mode
> +     = targetm.vectorize.get_mask_mode (vmode).require ();
> +
> +      auto_vec<int> elsvals;
> +      internal_fn ifn;
> +      bool have_masked_load
> +     = target_supports_mask_load_store_p (vmode, mask_mode, true, &ifn,
> +                                          &elsvals);
> +
> +      /* We might need to explicitly zero inactive elements if there are
> +      padding bits in the type that might leak otherwise.
> +      Refer to PR115336.  */
> +      bool need_zero
> +     = TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (smode);
> +
> +      int elsval;
> +      bool implicit_zero = false;
> +      if (have_masked_load)
> +     {
> +       gcc_assert (elsvals.length ());
> +
> +       /* But not if the target already provide implicit zeroing of inactive
> +          elements.  */
> +       implicit_zero = elsvals.contains (MASK_LOAD_ELSE_ZERO);
> +
> +       /* For now, just use the first else value if zero is unsupported.  */
> +       elsval = implicit_zero ? MASK_LOAD_ELSE_ZERO : *elsvals.begin ();
> +     }
> +      else
> +     {
> +       /* We cannot vectorize this either way so just use a zero even
> +          if it is unsupported.  */
> +       elsval = MASK_LOAD_ELSE_ZERO;
> +     }
> +
> +      tree els = vect_get_mask_load_else (elsval, TREE_TYPE (lhs));
> +
> +      call_stmt
> +     = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
> +                                   ptr, mask, els);
> +

So what happens if the vectorizer chooses a vector type that does
not support MASK_LOAD_ELSE_ZERO but the .MASK_LOAD we create here
uses it?

Since we need to cope with this situation I think the "fix" is to
make if-conversion simply always use MASK_LOAD_ELSE_ZERO and not
emit the COND_EXPR and for the vectorizer to properly handle
this by either not vectorizing (boo!) or by adding a VEC_COND_EXPR
(yay!).  I think GCN is the only target that would need this
handling?

Richard.

> +      /* Build the load call and, if the else value is nonzero,
> +      a COND_EXPR that enforces it.  */
> +      tree loadlhs;
> +      if (!need_zero || implicit_zero)
> +     gimple_call_set_lhs (call_stmt, gimple_get_lhs (stmt));
> +      else
> +     {
> +       loadlhs = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "_ifc_");
> +       ssa_names->add (loadlhs);
> +       gimple_call_set_lhs (call_stmt, loadlhs);
> +     }
> +      gimple_set_vuse (call_stmt, gimple_vuse (stmt));
> +      gimple_seq_add_stmt (&stmts, call_stmt);
> +
> +      if (need_zero && !implicit_zero)
> +     {
> +       tree cond_rhs
> +         = fold_build_cond_expr (TREE_TYPE (loadlhs), mask, loadlhs,
> +                                 build_zero_cst (TREE_TYPE (loadlhs)));
> +       gassign *cond_stmt
> +         = gimple_build_assign (gimple_get_lhs (stmt), cond_rhs);
> +       gimple_seq_add_stmt (&stmts, cond_stmt);
> +     }
>      }
>    else
>      {
> -      new_stmt
> +      call_stmt
>       = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
>                                     mask, rhs);
> -      gimple_move_vops (new_stmt, stmt);
> +      gimple_move_vops (call_stmt, stmt);
> +      gimple_seq_add_stmt (&stmts, call_stmt);
>      }
> -  gimple_call_set_nothrow (new_stmt, true);
> -  return new_stmt;
> +  gimple_call_set_nothrow (call_stmt, true);
> +  return stmts;
>  }
>  
>  /* STMT uses OP_LHS.  Check whether it is equivalent to:
> @@ -2836,7 +2906,6 @@ predicate_statements (loop_p loop)
>           {
>             tree lhs = gimple_assign_lhs (stmt);
>             tree mask;
> -           gimple *new_stmt;
>             gimple_seq stmts = NULL;
>             machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
>             /* We checked before setting GF_PLF_2 that an equivalent
> @@ -2870,11 +2939,18 @@ predicate_statements (loop_p loop)
>                 vect_masks.safe_push (mask);
>               }
>             if (gimple_assign_single_p (stmt))
> -             new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> -           else
> -             new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> +             {
> +               gimple_seq call_seq
> +                 = predicate_load_or_store (&gsi, stmt, mask, &ssa_names);
>  
> -           gsi_replace (&gsi, new_stmt, true);
> +               gsi_replace_with_seq (&gsi, call_seq, true);
> +             }
> +           else
> +             {
> +               gimple *new_stmt;
> +               new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> +               gsi_replace (&gsi, new_stmt, true);
> +             }
>           }
>         else if (((lhs = gimple_assign_lhs (stmt)), true)
>                  && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to