Robin Dapp <rd...@linux.ibm.com> writes:
> Hi Richard,
>
>> We already have code to probe the predicates of the underlying
>> define_expands/insns to see whether they support certain constant
>> IFN arguments; see e.g. internal_gather_scatter_fn_supported_p.
>> We could do something similar here: add an extra operand to the optab,
>> and an extra argument to the IFN, that gives a bias amount.
>> The PowerPC version would require 0, the System Z version would
>> require -1.  The vectoriser would probe to see which value
>> it should use.
>> 
>> Doing it that way ensures that the gimple is still self-describing.
>> It avoids gimple semantics depending on target hooks.
>
> As I don't have much previous exposure to the vectoriser code, I cobbled 
> together something pretty ad-hoc (attached).  Does this come somehow 
> close to what you have in mind?

Yeah, looks good.

> internal_len_load_supported_p should rather be called 
> internal_len_load_bias_supported_p or so I guess and the part where we 
> exclude multiple loop_lens is still missing.

Since we only support one bias, it might be better to make the
internal-fn.c function return the bias as an int (with some marker
value for “not supported”), so that the caller doesn't need to probe
both values.

> Would we also check for a viable bias there and then either accept
> multiple lens or not?

Yeah, I think so.

Thanks,
Richard

>
> Regards
>   Robin
>
> commit 2320dbfdfe1477b15a2ac59847d2a52e68de49ab
> Author: Robin Dapp <rd...@linux.ibm.com>
> Date:   Tue Oct 26 14:36:08 2021 +0200
>
>     bias1
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 8312d08aab2..bf97d3e471a 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
>  static void
>  expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, target, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, target, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2727,7 +2727,18 @@ expand_partial_load_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>                                TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +  if (optab == len_load_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 3);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, SImode);
> +    }
> +
> +  if (optab != len_load_optab)
> +    expand_insn (icode, 3, ops);
> +  else
> +    expand_insn (icode, 4, ops);
> +
>    if (!rtx_equal_p (target, ops[0].value))
>      emit_move_insn (target, ops[0].value);
>  }
> @@ -2741,9 +2752,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>  static void
>  expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, reg, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, reg, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2770,7 +2781,17 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
>                                TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +  if (optab == len_store_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 4);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, SImode);
> +    }
> +
> +  if (optab != len_store_optab)
> +    expand_insn (icode, 3, ops);
> +  else
> +    expand_insn (icode, 4, ops);
>  }
>  
>  #define expand_mask_store_optab_fn expand_partial_store_optab_fn
> @@ -4154,6 +4175,25 @@ internal_gather_scatter_fn_supported_p (internal_fn 
> ifn, tree vector_type,
>         && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
>  }
>  
> +bool
> +internal_len_load_supported_p (internal_fn ifn, tree load_type, int bias)
> +{
> +  if (bias > 0 || bias < -1)
> +    return false;
> +
> +  machine_mode mode = TYPE_MODE (load_type);
> +
> +  optab optab = direct_internal_fn_optab (ifn);
> +  insn_code icode = direct_optab_handler (optab, mode);
> +  int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
> +
> +  if (icode != CODE_FOR_nothing
> +      && insn_operand_matches (icode, 2 + output_ops, GEN_INT (bias)))
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
>     for pointers of type TYPE when the accesses have LENGTH bytes and their
>     common byte alignment is ALIGN.  */
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..d0bf9941bcc 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -225,6 +225,7 @@ extern int internal_fn_mask_index (internal_fn);
>  extern int internal_fn_stored_value_index (internal_fn);
>  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
>                                                   tree, tree, int);
> +extern bool internal_len_load_supported_p (internal_fn ifn, tree, int);
>  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
>                                               poly_uint64, unsigned int);
>  
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index d7723b1a92a..50537763ace 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -8272,12 +8272,14 @@ vectorizable_store (vec_info *vinfo,
>                 opt_machine_mode new_ovmode
>                   = get_len_load_store_mode (vmode, false);
>                 machine_mode new_vmode = new_ovmode.require ();
> +               tree vtype = vectype;
>                 /* Need conversion if it's wrapped with VnQI.  */
>                 if (vmode != new_vmode)
>                   {
>                     tree new_vtype
>                       = build_vector_type_for_mode (unsigned_intQI_type_node,
> -                                                   new_vmode);
> +                                               new_vmode);
> +                   vtype = new_vtype;
>                     tree var
>                       = vect_get_new_ssa_name (new_vtype, vect_simple_var);
>                     vec_oprnd
> @@ -8289,9 +8291,29 @@ vectorizable_store (vec_info *vinfo,
>                                                  gsi);
>                     vec_oprnd = var;
>                   }
> +
> +               /* Check which bias value to use.  Default is 0. */
> +               tree bias = build_int_cst (intSI_type_node, 0);
> +               tree new_len = final_len;
> +               if (!internal_len_load_supported_p (IFN_LEN_LOAD, vtype, 0)
> +                   && internal_len_load_supported_p (IFN_LEN_LOAD,
> +                                                     vtype, -1))
> +                 {
> +                   bias = build_int_cst (intSI_type_node, -1);
> +                   new_len = make_ssa_name (TREE_TYPE (final_len));
> +                   gassign *m1 = gimple_build_assign (new_len,
> +                                                      MINUS_EXPR,
> +                                                      final_len,
> +                                                      build_one_cst
> +                                                      (TREE_TYPE
> +                                                       (final_len)));
> +                   vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +                                                gsi);
> +                 }
>                 gcall *call
> -                 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
> -                                               ptr, final_len, vec_oprnd);
> +                 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
> +                                               ptr, new_len, vec_oprnd,
> +                                               bias);
>                 gimple_call_set_nothrow (call, true);
>                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
>                 new_stmt = call;
> @@ -9588,24 +9610,50 @@ vectorizable_load (vec_info *vinfo,
>                                              vec_num * j + i);
>                       tree ptr = build_int_cst (ref_type,
>                                                 align * BITS_PER_UNIT);
> +
> +                     machine_mode vmode = TYPE_MODE (vectype);
> +                     opt_machine_mode new_ovmode
> +                       = get_len_load_store_mode (vmode, true);
> +                     machine_mode new_vmode = new_ovmode.require ();
> +                     tree qi_type = unsigned_intQI_type_node;
> +                     tree new_vtype
> +                       = build_vector_type_for_mode (qi_type, new_vmode);
> +
> +                     tree vtype = vectype;
> +                     if (vmode != new_vmode)
> +                       vtype = new_vtype;
> +
> +                     /* Check which bias value to use.  Default is 0. */
> +                     tree bias = build_int_cst (intSI_type_node, 0);
> +                     tree new_len = final_len;
> +                     if (!internal_len_load_supported_p (IFN_LEN_LOAD,
> +                                                         vtype, 0)
> +                         && internal_len_load_supported_p (IFN_LEN_LOAD,
> +                                                           vtype, -1))
> +                       {
> +                         bias = build_int_cst (intSI_type_node, -1);
> +                         new_len = make_ssa_name (TREE_TYPE (final_len));
> +                         gassign *m1 = gimple_build_assign (new_len,
> +                                                            MINUS_EXPR,
> +                                                            final_len,
> +                                                            build_one_cst
> +                                                            (TREE_TYPE
> +                                                             (final_len)));
> +                         vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +                                                      gsi);
> +                       }
> +
>                       gcall *call
> -                       = gimple_build_call_internal (IFN_LEN_LOAD, 3,
> +                       = gimple_build_call_internal (IFN_LEN_LOAD, 4,
>                                                       dataref_ptr, ptr,
> -                                                     final_len);
> +                                                     new_len, bias);
>                       gimple_call_set_nothrow (call, true);
>                       new_stmt = call;
>                       data_ref = NULL_TREE;
>  
>                       /* Need conversion if it's wrapped with VnQI.  */
> -                     machine_mode vmode = TYPE_MODE (vectype);
> -                     opt_machine_mode new_ovmode
> -                       = get_len_load_store_mode (vmode, true);
> -                     machine_mode new_vmode = new_ovmode.require ();
>                       if (vmode != new_vmode)
>                         {
> -                         tree qi_type = unsigned_intQI_type_node;
> -                         tree new_vtype
> -                           = build_vector_type_for_mode (qi_type, new_vmode);
>                           tree var = vect_get_new_ssa_name (new_vtype,
>                                                             vect_simple_var);
>                           gimple_set_lhs (call, var);

Reply via email to