On Wed, 10 Jul 2024, Tamar Christina wrote:

> > > >
> > > > > +     }
> > > > > +
> > > > > +      if (new_code == ERROR_MARK)
> > > > > +     {
> > > > > +       /* We couldn't flip the condition, so invert the mask 
> > > > > instead.  */
> > > > > +       itype = TREE_TYPE (cmp_ls);
> > > > > +       conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
> > > > > +                                   build_int_cst (itype, 1));
> > > > > +     }
> > > > > +
> > > > > +      mask_vec_type = get_mask_type_for_scalar_type (loop_vinfo, 
> > > > > itype);
> > > > > +      append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type,
> > itype);
> > > > > +      /* Then prepare the boolean mask as the mask conversion pattern
> > > > > +      won't hit on the pattern statement.  */
> > > > > +      cmp_ls = build_mask_conversion (vinfo, var, gs_vectype, 
> > > > > stmt_vinfo);
> > > >
> > > > Isn't this somewhat redundant with the below call?
> > > >
> > > > I fear of bad [non-]interactions with bool pattern recognition btw.
> > >
> > > So this is again another issue with that patterns don't apply to newly 
> > > produced
> > patterns.
> > > and so they can't serve as root for new patterns.  This is why the 
> > > scatter/gather
> > pattern
> > > addition refactored part of the work into these helper functions.
> > >
> > > I did actually try to just add a secondary loop that iterates over newly 
> > > produced
> > patterns
> > > but you later run into problems where a new pattern completely cancels 
> > > out an
> > old pattern
> > > rather than just extend it.
> > >
> > > So at the moment, unless the code ends up being hybrid, whatever the bool
> > recog pattern
> > > does is just ignored as irrelevant.
> > >
> > > But If we don't invert the compare then it should be simpler as the 
> > > original
> > compare is
> > > never in a pattern.
> > >
> > > I'll respin with these changes.
> > 
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/115531
>       * tree-vect-patterns.cc (vect_cond_store_pattern_same_ref): New.
>       (vect_recog_cond_store_pattern): New.
>       (vect_vect_recog_func_ptrs): Use it.
>       * target.def (conditional_operation_is_expensive): New.
>       * doc/tm.texi: Regenerate.
>       * doc/tm.texi.in: Document it.
>       * targhooks.cc (default_conditional_operation_is_expensive): New.
>       * targhooks.h (default_conditional_operation_is_expensive): New.
>       * tree-vectorizer.h (may_be_nonaddressable_p): New.
It's declared in tree-ssa-loop-ivopts.h so just include that.

> 
> -- inline copy of patch --
> 
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 
> f10d9a59c6673a02823fc05132235af3a1ad7c65..c7535d07f4ddd16d55e0ab9b609a2bf95931a2f4
>  100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -6449,6 +6449,13 @@ The default implementation returns a 
> @code{MODE_VECTOR_INT} with the
>  same size and number of elements as @var{mode}, if such a mode exists.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} bool 
> TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE (unsigned @var{ifn})
> +This hook returns true if masked operation @var{ifn} (really of
> +type @code{internal_fn}) should be considered more expensive to use than
> +implementing the same operation without masking.  GCC can then try to use
> +unconditional operations instead with extra selects.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE 
> (unsigned @var{ifn})
>  This hook returns true if masked internal function @var{ifn} (really of
>  type @code{internal_fn}) should be considered expensive when the mask is
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 
> 24596eb2f6b4e9ea3ea3464fda171d99155f4c0f..64cea3b1edaf8ec818c0e8095ab50b00ae0cb857
>  100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -4290,6 +4290,8 @@ address;  but often a machine-dependent strategy can 
> generate better code.
>  
>  @hook TARGET_VECTORIZE_GET_MASK_MODE
>  
> +@hook TARGET_VECTORIZE_CONDITIONAL_OPERATION_IS_EXPENSIVE
> +
>  @hook TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
>  
>  @hook TARGET_VECTORIZE_CREATE_COSTS
> diff --git a/gcc/target.def b/gcc/target.def
> index 
> ce4d1ecd58be0a1c8110c6993556a52a2c69168e..3de1aad4c84d3df0b171a411f97e1ce70b6f63b5
>  100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -2033,6 +2033,18 @@ same size and number of elements as @var{mode}, if 
> such a mode exists.",
>   (machine_mode mode),
>   default_get_mask_mode)
>  
> +/* Function to say whether a conditional operation is expensive when
> +   compared to non-masked operations.  */
> +DEFHOOK
> +(conditional_operation_is_expensive,
> + "This hook returns true if masked operation @var{ifn} (really of\n\
> +type @code{internal_fn}) should be considered more expensive to use than\n\
> +implementing the same operation without masking.  GCC can then try to use\n\
> +unconditional operations instead with extra selects.",
> + bool,
> + (unsigned ifn),
> + default_conditional_operation_is_expensive)
> +
>  /* Function to say whether a masked operation is expensive when the
>     mask is all zeros.  */
>  DEFHOOK
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 
> 3cbca0f13a5e5de893630c45a6bbe0616b725e86..2704d6008f14d2aa65671f002af886d3b802effa
>  100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -123,6 +123,7 @@ extern opt_machine_mode default_vectorize_related_mode 
> (machine_mode,
>                                                       poly_uint64);
>  extern opt_machine_mode default_get_mask_mode (machine_mode);
>  extern bool default_empty_mask_is_expensive (unsigned);
> +extern bool default_conditional_operation_is_expensive (unsigned);
>  extern vector_costs *default_vectorize_create_costs (vec_info *, bool);
>  
>  /* OpenACC hooks.  */
> diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> index 
> b10104c363bf8432082d51c0ecb7e2a6811c2cc2..793932a77c60b0cd4bb670de50b7f7fdf2de2159
>  100644
> --- a/gcc/targhooks.cc
> +++ b/gcc/targhooks.cc
> @@ -1608,6 +1608,14 @@ default_get_mask_mode (machine_mode mode)
>  
>  /* By default consider masked stores to be expensive.  */
>  
> +bool
> +default_conditional_operation_is_expensive (unsigned ifn)
> +{
> +  return ifn == IFN_MASK_STORE;
> +}
> +
> +/* By default consider masked stores to be expensive.  */
> +
>  bool
>  default_empty_mask_is_expensive (unsigned ifn)
>  {
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 
> 86e893a1c4330ae6e8d1a54438c2977da623c4b5..68d0da05eb41afe070478664deae3b8330db16fe
>  100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-vector-builder.h"
>  #include "vec-perm-indices.h"
>  #include "gimple-range.h"
> +#include "alias.h"
>  
>  
>  /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
> @@ -6461,6 +6462,157 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>  
> +/* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
> +   is points to a load statement that reads the same data as that of
> +   STORE_VINFO.  */
> +
> +static bool
> +vect_cond_store_pattern_same_ref (vec_info *vinfo,
> +                               stmt_vec_info store_vinfo, tree cond_arg)
> +{
> +  stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
> +  if (!load_stmt_vinfo
> +      || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
> +      || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
> +      || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
> +                       STMT_VINFO_DATA_REF (load_stmt_vinfo)))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Function vect_recog_cond_store_pattern
> +
> +   Try to find the following pattern:
> +
> +   x = *_3;
> +   c = a CMP b;
> +   y = c ? t_20 : x;
> +   *_3 = y;
> +
> +   where the store of _3 happens on a conditional select on a value loaded
> +   from the same location.  In such case we can elide the initial load if
> +   MASK_STORE is supported and instead only conditionally write out the 
> result.
> +
> +   The pattern produces for the above:
> +
> +   c = a CMP b;
> +   .MASK_STORE (_3, c, t_20)
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins.  In the
> +   example, when this function is called with _3 then the search begins.
> +
> +   Output:
> +
> +   * TYPE_OUT: The type of the output  of this pattern.
> +
> +   * Return value: A new stmt that will be used to replace the sequence.  */
> +
> +static gimple *
> +vect_recog_cond_store_pattern (vec_info *vinfo,
> +                            stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> +  if (!loop_vinfo)
> +    return NULL;
> +
> +  gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
> +
> +  /* Needs to be a gimple store where we have DR info for.  */
> +  if (!STMT_VINFO_DATA_REF (stmt_vinfo)
> +      || !DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_vinfo))
> +      || !gimple_store_p (store_stmt))
> +    return NULL;
> +
> +  tree st_rhs = gimple_assign_rhs1 (store_stmt);
> +  tree st_lhs = gimple_assign_lhs (store_stmt);
> +
> +  if (TREE_CODE (st_rhs) != SSA_NAME)
> +    return NULL;
> +
> +  gassign *cond_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (st_rhs));
> +  if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
> +    return NULL;
> +
> +  /* Check if the else value matches the original loaded one.  */
> +  bool invert = false;
> +  tree cmp_ls = gimple_arg (cond_stmt, 0);
> +  tree cond_arg1 = gimple_arg (cond_stmt, 1);
> +  tree cond_arg2 = gimple_arg (cond_stmt, 2);
> +
> +  if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
> +      && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
> +                                                   cond_arg1)))
> +    return NULL;

You miss a check for intermediate stores that alias.  Consider

> +   x = *_3;
> +   c = a CMP b;
      *_3 = 1;
> +   y = c ? t_20 : x;
> +   *_3 = y;

The easiest would be to match up the gimple_vuse of the store and the
load.

> +  vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
> +
> +  tree scalar_type = TREE_TYPE (st_rhs);
> +  if (VECTOR_TYPE_P (scalar_type))
> +    return NULL;
> +
> +  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
> +  if (vectype == NULL_TREE)
> +    return NULL;
> +
> +  machine_mode mask_mode;
> +  machine_mode vecmode = TYPE_MODE (vectype);
> +  if (targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
> +      || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
> +      || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
> +    return NULL;
> +
> +  /* Convert the mask to the right form.  */
> +  tree cond_store_arg = cond_arg1;
> +
> +  /* If we have to invert the condition, i.e. use the true argument rather 
> than
> +     the false argument, we should check whether we can just invert the
> +     comparison or if we have to negate the result.  */
> +  if (invert)
> +    {
> +      /* We need to use the false parameter of the conditional select.  */
> +      cond_store_arg = cond_arg2;
> +      tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
> +
> +       /* We couldn't flip the condition, so invert the mask instead.  */
> +      tree itype = TREE_TYPE (cmp_ls);
> +      gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
> +                                        build_int_cst (itype, 1));
> +
> +      tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
> +      append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
> +      cmp_ls= var;
> +    }
> +
> +  tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
> +  if (may_be_nonaddressable_p (base))
> +    return NULL;

Can you check this earlier, before builting gimple stuff?

Otherwise looks OK.

> +
> +  if (TREE_CODE (base) != MEM_REF)
> +   base = build_fold_addr_expr (base);
> +
> +  tree ptr = build_int_cst (reference_alias_ptr_type (base),
> +                         get_object_alignment (base));
> +
> +  tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
> +                                          vinfo);
> +
> +  gcall *call
> +    = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
> +                               cond_store_arg);
> +  gimple_set_location (call, gimple_location (store_stmt));
> +
> +  /* Copy across relevant vectorization info and associate DR with the
> +     new pattern statement instead of the original statement.  */
> +  stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
> +  loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
> +
> +  *type_out = vectype;
> +  return call;
> +}
> +
>  /* Return true if TYPE is a non-boolean integer type.  These are the types
>     that we want to consider for narrowing.  */
>  
> @@ -7126,6 +7278,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>       of mask conversion that are needed for gather and scatter
>       internal functions.  */
>    { vect_recog_gather_scatter_pattern, "gather_scatter" },
> +  { vect_recog_cond_store_pattern, "cond_store" },
>    { vect_recog_mask_conversion_pattern, "mask_conversion" },
>    { vect_recog_widen_plus_pattern, "widen_plus" },
>    { vect_recog_widen_minus_pattern, "widen_minus" },
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> 8eb3ec4df86906b740560774a41eb48394f77bfb..dbe1c7c5c21e6bebc7114d4254b5c988ea969f9f
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2353,6 +2353,7 @@ extern opt_tree vect_get_mask_type_for_stmt 
> (stmt_vec_info, unsigned int = 0);
>  
>  /* In tree-if-conv.cc.  */
>  extern bool ref_within_array_bound (gimple *, tree);
> +extern bool may_be_nonaddressable_p (tree);
>  
>  /* In tree-vect-data-refs.cc.  */
>  extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to