On Fri, 18 Oct 2024, Robin Dapp wrote: > When predicating a load we implicitly assume that the else value is > zero. This matters in case the loaded value is padded (like e.g. > a Bool) and we must ensure that the padding bytes are zero on targets > that don't implicitly zero inactive elements. > > In order to formalize this this patch queries the target for > its supported else operand and uses that for the maskload call. > Subsequently, if the else operand is nonzero, a cond_expr enforcing > a zero else value is emitted. > > gcc/ChangeLog: > > * tree-if-conv.cc (predicate_load_or_store): Enforce zero else > value for padded types. > (predicate_statements): Use sequence instead of statement. > --- > gcc/tree-if-conv.cc | 112 +++++++++++++++++++++++++++++++++++++------- > 1 file changed, 94 insertions(+), 18 deletions(-) > > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index 90c754a4814..9623426e1e1 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -2531,12 +2531,15 @@ mask_exists (int size, const vec<int> &vec) > > /* Helper function for predicate_statements. STMT is a memory read or > write and it needs to be predicated by MASK. Return a statement > - that does so. */ > + that does so. SSA_NAMES is the set of SSA names defined earlier in > + STMT's block. */ > > -static gimple * > -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) > +static gimple_seq > +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask, > + hash_set<tree_ssa_name_hash> *ssa_names) > { > - gcall *new_stmt; > + gimple_seq stmts = NULL; > + gcall *call_stmt; > > tree lhs = gimple_assign_lhs (stmt); > tree rhs = gimple_assign_rhs1 (stmt); > @@ -2552,21 +2555,88 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, > gassign *stmt, tree mask) > ref); > if (TREE_CODE (lhs) == SSA_NAME) > { > - new_stmt > - = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, > - ptr, mask); > - gimple_call_set_lhs (new_stmt, lhs); > - gimple_set_vuse (new_stmt, gimple_vuse (stmt)); > + /* Get the preferred vector mode and its corresponding mask for the > + masked load. We need this to query the target's supported else > + operands. */ > + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); > + scalar_mode smode = as_a <scalar_mode> (mode); > + > + machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode); > + machine_mode mask_mode > + = targetm.vectorize.get_mask_mode (vmode).require (); > + > + auto_vec<int> elsvals; > + internal_fn ifn; > + bool have_masked_load > + = target_supports_mask_load_store_p (vmode, mask_mode, true, &ifn, > + &elsvals); > + > + /* We might need to explicitly zero inactive elements if there are > + padding bits in the type that might leak otherwise. > + Refer to PR115336. */ > + bool need_zero > + = TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (smode); > + > + int elsval; > + bool implicit_zero = false; > + if (have_masked_load) > + { > + gcc_assert (elsvals.length ()); > + > + /* But not if the target already provide implicit zeroing of inactive > + elements. */ > + implicit_zero = elsvals.contains (MASK_LOAD_ELSE_ZERO); > + > + /* For now, just use the first else value if zero is unsupported. */ > + elsval = implicit_zero ? MASK_LOAD_ELSE_ZERO : *elsvals.begin (); > + } > + else > + { > + /* We cannot vectorize this either way so just use a zero even > + if it is unsupported. */ > + elsval = MASK_LOAD_ELSE_ZERO; > + } > + > + tree els = vect_get_mask_load_else (elsval, TREE_TYPE (lhs)); > + > + call_stmt > + = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr, > + ptr, mask, els); > +
So what happens if the vectorizer chooses a vector type that does not support MASK_LOAD_ELSE_ZERO but the .MASK_LOAD we create here uses it? Since we need to cope with this situation I think the "fix" is to make if-conversion simply always use MASK_LOAD_ELSE_ZERO and not emit the COND_EXPR and for the vectorizer to properly handle this by either not vectorizing (boo!) or by adding a VEC_COND_EXPR (yay!). I think GCN is the only target that would need this handling? Richard. > + /* Build the load call and, if the else value is nonzero, > + a COND_EXPR that enforces it. */ > + tree loadlhs; > + if (!need_zero || implicit_zero) > + gimple_call_set_lhs (call_stmt, gimple_get_lhs (stmt)); > + else > + { > + loadlhs = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "_ifc_"); > + ssa_names->add (loadlhs); > + gimple_call_set_lhs (call_stmt, loadlhs); > + } > + gimple_set_vuse (call_stmt, gimple_vuse (stmt)); > + gimple_seq_add_stmt (&stmts, call_stmt); > + > + if (need_zero && !implicit_zero) > + { > + tree cond_rhs > + = fold_build_cond_expr (TREE_TYPE (loadlhs), mask, loadlhs, > + build_zero_cst (TREE_TYPE (loadlhs))); > + gassign *cond_stmt > + = gimple_build_assign (gimple_get_lhs (stmt), cond_rhs); > + gimple_seq_add_stmt (&stmts, cond_stmt); > + } > } > else > { > - new_stmt > + call_stmt > = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, > mask, rhs); > - gimple_move_vops (new_stmt, stmt); > + gimple_move_vops (call_stmt, stmt); > + gimple_seq_add_stmt (&stmts, call_stmt); > } > - gimple_call_set_nothrow (new_stmt, true); > - return new_stmt; > + gimple_call_set_nothrow (call_stmt, true); > + return stmts; > } > > /* STMT uses OP_LHS. Check whether it is equivalent to: > @@ -2836,7 +2906,6 @@ predicate_statements (loop_p loop) > { > tree lhs = gimple_assign_lhs (stmt); > tree mask; > - gimple *new_stmt; > gimple_seq stmts = NULL; > machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); > /* We checked before setting GF_PLF_2 that an equivalent > @@ -2870,11 +2939,18 @@ predicate_statements (loop_p loop) > vect_masks.safe_push (mask); > } > if (gimple_assign_single_p (stmt)) > - new_stmt = predicate_load_or_store (&gsi, stmt, mask); > - else > - new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); > + { > + gimple_seq call_seq > + = predicate_load_or_store (&gsi, stmt, mask, &ssa_names); > > - gsi_replace (&gsi, new_stmt, true); > + gsi_replace_with_seq (&gsi, call_seq, true); > + } > + else > + { > + gimple *new_stmt; > + new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); > + gsi_replace (&gsi, new_stmt, true); > + } > } > else if (((lhs = gimple_assign_lhs (stmt)), true) > && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)