Thanks Richard for comments.

> Please mention the full optab names.

Sure, let me adjust this before commit manually.

> There is documentation missing for doc/md.texi for the new optabs.

Ack, will take another patch for doc.

> Otherwise looks OK.  I'll note that non-masked or non-len-only-masked
> variants are missing but this is OK I guess.

Yes, we can add non-masked/non-len variants when we need in future.

Pan

-----Original Message-----
From: Richard Biener <richard.guent...@gmail.com> 
Sent: Tuesday, October 29, 2024 6:44 PM
To: Li, Pan2 <pan2...@intel.com>
Cc: gcc-patches@gcc.gnu.org; tamar.christ...@arm.com; juzhe.zh...@rivai.ai; 
kito.ch...@gmail.com; jeffreya...@gmail.com; rdapp....@gmail.com
Subject: Re: [PATCH 1/5] Internal-fn: Introduce new IFN 
MASK_LEN_STRIDED_LOAD{STORE}

On Wed, Oct 23, 2024 at 12:47 PM <pan2...@intel.com> wrote:
>
> From: Pan Li <pan2...@intel.com>
>
> This patch would like to introduce new IFN for strided load and store.
>
> LOAD:  v = MASK_LEN_STRIDED_LOAD (ptr, stride, mask, len, bias)
> STORE: MASK_LEN_STRIED_STORE (ptr, stride, v, mask, len, bias)
>
> The IFN target below code example similar as below
>
> void foo (int * a, int * b, int stride, int n)
> {
>   for (int i = 0; i < n; i++)
>     a[i * stride] = b[i * stride];
> }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * internal-fn.cc (strided_load_direct): Add new define direct
>         for strided load.
>         (strided_store_direct): Ditto but for store.
>         (expand_strided_load_optab_fn): Add new func to expand the IFN
>         MASK_LEN_STRIDED_LOAD in middle-end.
>         (expand_strided_store_optab_fn): Ditto but for store.
>         (direct_strided_load_optab_supported_p): Add define for stride
>         load optab supported.
>         (direct_strided_store_optab_supported_p): Ditto but for store.
>         (internal_fn_len_index): Add strided load/store len index.
>         (internal_fn_mask_index): Ditto but for mask.
>         (internal_fn_stored_value_index): Add strided store value index.
>         * internal-fn.def (MASK_LEN_STRIDED_LOAD): Add new IFN for
>         strided load.
>         (MASK_LEN_STRIDED_STORE): Ditto but for store.
>         * optabs.def (OPTAB_D): Add strided load/store optab.

Please mention the full optab names.

There is documentation missing for doc/md.texi for the new optabs.

Otherwise looks OK.  I'll note that non-masked or non-len-only-masked
variants are missing but this is OK I guess.

Richard.

>
> Signed-off-by: Pan Li <pan2...@intel.com>
> Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai>
> ---
>  gcc/internal-fn.cc  | 71 +++++++++++++++++++++++++++++++++++++++++++++
>  gcc/internal-fn.def |  6 ++++
>  gcc/optabs.def      |  2 ++
>  3 files changed, 79 insertions(+)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index d89a04fe412..bfbbba8e2dd 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -159,6 +159,7 @@ init_internal_fns ()
>  #define load_lanes_direct { -1, -1, false }
>  #define mask_load_lanes_direct { -1, -1, false }
>  #define gather_load_direct { 3, 1, false }
> +#define strided_load_direct { -1, -1, false }
>  #define len_load_direct { -1, -1, false }
>  #define mask_len_load_direct { -1, 4, false }
>  #define mask_store_direct { 3, 2, false }
> @@ -168,6 +169,7 @@ init_internal_fns ()
>  #define vec_cond_mask_len_direct { 1, 1, false }
>  #define vec_cond_direct { 2, 0, false }
>  #define scatter_store_direct { 3, 1, false }
> +#define strided_store_direct { 1, 1, false }
>  #define len_store_direct { 3, 3, false }
>  #define mask_len_store_direct { 4, 5, false }
>  #define vec_set_direct { 3, 3, false }
> @@ -3712,6 +3714,64 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, 
> direct_optab optab)
>    assign_call_lhs (lhs, lhs_rtx, &ops[0]);
>  }
>
> +/* Expand MASK_LEN_STRIDED_LOAD call CALL by optab OPTAB.  */
> +
> +static void
> +expand_strided_load_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
> +                             direct_optab optab)
> +{
> +  tree lhs = gimple_call_lhs (stmt);
> +  tree base = gimple_call_arg (stmt, 0);
> +  tree stride = gimple_call_arg (stmt, 1);
> +
> +  rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +  rtx base_rtx = expand_normal (base);
> +  rtx stride_rtx = expand_normal (stride);
> +
> +  unsigned i = 0;
> +  class expand_operand ops[6];
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +
> +  create_output_operand (&ops[i++], lhs_rtx, mode);
> +  create_address_operand (&ops[i++], base_rtx);
> +  create_address_operand (&ops[i++], stride_rtx);
> +
> +  i = add_mask_and_len_args (ops, i, stmt);
> +  expand_insn (direct_optab_handler (optab, mode), i, ops);
> +
> +  if (!rtx_equal_p (lhs_rtx, ops[0].value))
> +    emit_move_insn (lhs_rtx, ops[0].value);
> +}
> +
> +/* Expand MASK_LEN_STRIDED_STORE call CALL by optab OPTAB.  */
> +
> +static void
> +expand_strided_store_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
> +                              direct_optab optab)
> +{
> +  internal_fn fn = gimple_call_internal_fn (stmt);
> +  int rhs_index = internal_fn_stored_value_index (fn);
> +
> +  tree base = gimple_call_arg (stmt, 0);
> +  tree stride = gimple_call_arg (stmt, 1);
> +  tree rhs = gimple_call_arg (stmt, rhs_index);
> +
> +  rtx base_rtx = expand_normal (base);
> +  rtx stride_rtx = expand_normal (stride);
> +  rtx rhs_rtx = expand_normal (rhs);
> +
> +  unsigned i = 0;
> +  class expand_operand ops[6];
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (rhs));
> +
> +  create_address_operand (&ops[i++], base_rtx);
> +  create_address_operand (&ops[i++], stride_rtx);
> +  create_input_operand (&ops[i++], rhs_rtx, mode);
> +
> +  i = add_mask_and_len_args (ops, i, stmt);
> +  expand_insn (direct_optab_handler (optab, mode), i, ops);
> +}
> +
>  /* Helper for expand_DIVMOD.  Return true if the sequence starting with
>     INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes.  */
>
> @@ -4101,6 +4161,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
> tree_pair types,
>  #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
>  #define direct_mask_load_lanes_optab_supported_p 
> multi_vector_optab_supported_p
>  #define direct_gather_load_optab_supported_p convert_optab_supported_p
> +#define direct_strided_load_optab_supported_p direct_optab_supported_p
>  #define direct_len_load_optab_supported_p direct_optab_supported_p
>  #define direct_mask_len_load_optab_supported_p convert_optab_supported_p
>  #define direct_mask_store_optab_supported_p convert_optab_supported_p
> @@ -4109,6 +4170,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
> tree_pair types,
>  #define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p
>  #define direct_vec_cond_optab_supported_p convert_optab_supported_p
>  #define direct_scatter_store_optab_supported_p convert_optab_supported_p
> +#define direct_strided_store_optab_supported_p direct_optab_supported_p
>  #define direct_len_store_optab_supported_p direct_optab_supported_p
>  #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
>  #define direct_while_optab_supported_p convert_optab_supported_p
> @@ -4808,6 +4870,8 @@ internal_fn_len_index (internal_fn fn)
>      case IFN_COND_LEN_XOR:
>      case IFN_COND_LEN_SHL:
>      case IFN_COND_LEN_SHR:
> +    case IFN_MASK_LEN_STRIDED_LOAD:
> +    case IFN_MASK_LEN_STRIDED_STORE:
>        return 4;
>
>      case IFN_COND_LEN_NEG:
> @@ -4902,6 +4966,10 @@ internal_fn_mask_index (internal_fn fn)
>      case IFN_MASK_LEN_STORE:
>        return 2;
>
> +    case IFN_MASK_LEN_STRIDED_LOAD:
> +    case IFN_MASK_LEN_STRIDED_STORE:
> +      return 3;
> +
>      case IFN_MASK_GATHER_LOAD:
>      case IFN_MASK_SCATTER_STORE:
>      case IFN_MASK_LEN_GATHER_LOAD:
> @@ -4925,6 +4993,9 @@ internal_fn_stored_value_index (internal_fn fn)
>  {
>    switch (fn)
>      {
> +    case IFN_MASK_LEN_STRIDED_STORE:
> +      return 2;
> +
>      case IFN_MASK_STORE:
>      case IFN_MASK_STORE_LANES:
>      case IFN_SCATTER_STORE:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 23b4ab02b30..2d455938271 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
>     - mask_load_lanes: currently just vec_mask_load_lanes
>     - mask_len_load_lanes: currently just vec_mask_len_load_lanes
>     - gather_load: used for {mask_,mask_len_,}gather_load
> +   - strided_load: currently just mask_len_strided_load
>     - len_load: currently just len_load
>     - mask_len_load: currently just mask_len_load
>
> @@ -64,6 +65,7 @@ along with GCC; see the file COPYING3.  If not see
>     - mask_store_lanes: currently just vec_mask_store_lanes
>     - mask_len_store_lanes: currently just vec_mask_len_store_lanes
>     - scatter_store: used for {mask_,mask_len_,}scatter_store
> +   - strided_store: currently just mask_len_strided_store
>     - len_store: currently just len_store
>     - mask_len_store: currently just mask_len_store
>
> @@ -212,6 +214,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
>                        mask_gather_load, gather_load)
>  DEF_INTERNAL_OPTAB_FN (MASK_LEN_GATHER_LOAD, ECF_PURE,
>                        mask_len_gather_load, gather_load)
> +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_LOAD, ECF_PURE,
> +                      mask_len_strided_load, strided_load)
>
>  DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
>  DEF_INTERNAL_OPTAB_FN (MASK_LEN_LOAD, ECF_PURE, mask_len_load, mask_len_load)
> @@ -221,6 +225,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
>                        mask_scatter_store, scatter_store)
>  DEF_INTERNAL_OPTAB_FN (MASK_LEN_SCATTER_STORE, 0,
>                        mask_len_scatter_store, scatter_store)
> +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_STORE, 0,
> +                      mask_len_strided_store, strided_store)
>
>  DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
>  DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index b48e2e5a5ac..90be40f74d5 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -548,6 +548,8 @@ OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
>  OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
>  OPTAB_D (len_load_optab, "len_load_$a")
>  OPTAB_D (len_store_optab, "len_store_$a")
> +OPTAB_D (mask_len_strided_load_optab, "mask_len_strided_load_$a")
> +OPTAB_D (mask_len_strided_store_optab, "mask_len_strided_store_$a")
>  OPTAB_D (select_vl_optab, "select_vl$a")
>  OPTAB_D (andn_optab, "andn$a3")
>  OPTAB_D (iorn_optab, "iorn$a3")
> --
> 2.43.0
>

Reply via email to