Thanks Richard for comments. > Please mention the full optab names.
Sure, let me adjust this before commit manually. > There is documentation missing for doc/md.texi for the new optabs. Ack, will take another patch for doc. > Otherwise looks OK. I'll note that non-masked or non-len-only-masked > variants are missing but this is OK I guess. Yes, we can add non-masked/non-len variants when we need in future. Pan -----Original Message----- From: Richard Biener <richard.guent...@gmail.com> Sent: Tuesday, October 29, 2024 6:44 PM To: Li, Pan2 <pan2...@intel.com> Cc: gcc-patches@gcc.gnu.org; tamar.christ...@arm.com; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; jeffreya...@gmail.com; rdapp....@gmail.com Subject: Re: [PATCH 1/5] Internal-fn: Introduce new IFN MASK_LEN_STRIDED_LOAD{STORE} On Wed, Oct 23, 2024 at 12:47 PM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > This patch would like to introduce new IFN for strided load and store. > > LOAD: v = MASK_LEN_STRIDED_LOAD (ptr, stride, mask, len, bias) > STORE: MASK_LEN_STRIED_STORE (ptr, stride, v, mask, len, bias) > > The IFN target below code example similar as below > > void foo (int * a, int * b, int stride, int n) > { > for (int i = 0; i < n; i++) > a[i * stride] = b[i * stride]; > } > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. > > gcc/ChangeLog: > > * internal-fn.cc (strided_load_direct): Add new define direct > for strided load. > (strided_store_direct): Ditto but for store. > (expand_strided_load_optab_fn): Add new func to expand the IFN > MASK_LEN_STRIDED_LOAD in middle-end. > (expand_strided_store_optab_fn): Ditto but for store. > (direct_strided_load_optab_supported_p): Add define for stride > load optab supported. > (direct_strided_store_optab_supported_p): Ditto but for store. > (internal_fn_len_index): Add strided load/store len index. > (internal_fn_mask_index): Ditto but for mask. > (internal_fn_stored_value_index): Add strided store value index. > * internal-fn.def (MASK_LEN_STRIDED_LOAD): Add new IFN for > strided load. > (MASK_LEN_STRIDED_STORE): Ditto but for store. > * optabs.def (OPTAB_D): Add strided load/store optab. Please mention the full optab names. There is documentation missing for doc/md.texi for the new optabs. Otherwise looks OK. I'll note that non-masked or non-len-only-masked variants are missing but this is OK I guess. Richard. > > Signed-off-by: Pan Li <pan2...@intel.com> > Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai> > --- > gcc/internal-fn.cc | 71 +++++++++++++++++++++++++++++++++++++++++++++ > gcc/internal-fn.def | 6 ++++ > gcc/optabs.def | 2 ++ > 3 files changed, 79 insertions(+) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index d89a04fe412..bfbbba8e2dd 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -159,6 +159,7 @@ init_internal_fns () > #define load_lanes_direct { -1, -1, false } > #define mask_load_lanes_direct { -1, -1, false } > #define gather_load_direct { 3, 1, false } > +#define strided_load_direct { -1, -1, false } > #define len_load_direct { -1, -1, false } > #define mask_len_load_direct { -1, 4, false } > #define mask_store_direct { 3, 2, false } > @@ -168,6 +169,7 @@ init_internal_fns () > #define vec_cond_mask_len_direct { 1, 1, false } > #define vec_cond_direct { 2, 0, false } > #define scatter_store_direct { 3, 1, false } > +#define strided_store_direct { 1, 1, false } > #define len_store_direct { 3, 3, false } > #define mask_len_store_direct { 4, 5, false } > #define vec_set_direct { 3, 3, false } > @@ -3712,6 +3714,64 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, > direct_optab optab) > assign_call_lhs (lhs, lhs_rtx, &ops[0]); > } > > +/* Expand MASK_LEN_STRIDED_LOAD call CALL by optab OPTAB. */ > + > +static void > +expand_strided_load_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt, > + direct_optab optab) > +{ > + tree lhs = gimple_call_lhs (stmt); > + tree base = gimple_call_arg (stmt, 0); > + tree stride = gimple_call_arg (stmt, 1); > + > + rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + rtx base_rtx = expand_normal (base); > + rtx stride_rtx = expand_normal (stride); > + > + unsigned i = 0; > + class expand_operand ops[6]; > + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); > + > + create_output_operand (&ops[i++], lhs_rtx, mode); > + create_address_operand (&ops[i++], base_rtx); > + create_address_operand (&ops[i++], stride_rtx); > + > + i = add_mask_and_len_args (ops, i, stmt); > + expand_insn (direct_optab_handler (optab, mode), i, ops); > + > + if (!rtx_equal_p (lhs_rtx, ops[0].value)) > + emit_move_insn (lhs_rtx, ops[0].value); > +} > + > +/* Expand MASK_LEN_STRIDED_STORE call CALL by optab OPTAB. */ > + > +static void > +expand_strided_store_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt, > + direct_optab optab) > +{ > + internal_fn fn = gimple_call_internal_fn (stmt); > + int rhs_index = internal_fn_stored_value_index (fn); > + > + tree base = gimple_call_arg (stmt, 0); > + tree stride = gimple_call_arg (stmt, 1); > + tree rhs = gimple_call_arg (stmt, rhs_index); > + > + rtx base_rtx = expand_normal (base); > + rtx stride_rtx = expand_normal (stride); > + rtx rhs_rtx = expand_normal (rhs); > + > + unsigned i = 0; > + class expand_operand ops[6]; > + machine_mode mode = TYPE_MODE (TREE_TYPE (rhs)); > + > + create_address_operand (&ops[i++], base_rtx); > + create_address_operand (&ops[i++], stride_rtx); > + create_input_operand (&ops[i++], rhs_rtx, mode); > + > + i = add_mask_and_len_args (ops, i, stmt); > + expand_insn (direct_optab_handler (optab, mode), i, ops); > +} > + > /* Helper for expand_DIVMOD. Return true if the sequence starting with > INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */ > > @@ -4101,6 +4161,7 @@ multi_vector_optab_supported_p (convert_optab optab, > tree_pair types, > #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p > #define direct_mask_load_lanes_optab_supported_p > multi_vector_optab_supported_p > #define direct_gather_load_optab_supported_p convert_optab_supported_p > +#define direct_strided_load_optab_supported_p direct_optab_supported_p > #define direct_len_load_optab_supported_p direct_optab_supported_p > #define direct_mask_len_load_optab_supported_p convert_optab_supported_p > #define direct_mask_store_optab_supported_p convert_optab_supported_p > @@ -4109,6 +4170,7 @@ multi_vector_optab_supported_p (convert_optab optab, > tree_pair types, > #define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p > #define direct_vec_cond_optab_supported_p convert_optab_supported_p > #define direct_scatter_store_optab_supported_p convert_optab_supported_p > +#define direct_strided_store_optab_supported_p direct_optab_supported_p > #define direct_len_store_optab_supported_p direct_optab_supported_p > #define direct_mask_len_store_optab_supported_p convert_optab_supported_p > #define direct_while_optab_supported_p convert_optab_supported_p > @@ -4808,6 +4870,8 @@ internal_fn_len_index (internal_fn fn) > case IFN_COND_LEN_XOR: > case IFN_COND_LEN_SHL: > case IFN_COND_LEN_SHR: > + case IFN_MASK_LEN_STRIDED_LOAD: > + case IFN_MASK_LEN_STRIDED_STORE: > return 4; > > case IFN_COND_LEN_NEG: > @@ -4902,6 +4966,10 @@ internal_fn_mask_index (internal_fn fn) > case IFN_MASK_LEN_STORE: > return 2; > > + case IFN_MASK_LEN_STRIDED_LOAD: > + case IFN_MASK_LEN_STRIDED_STORE: > + return 3; > + > case IFN_MASK_GATHER_LOAD: > case IFN_MASK_SCATTER_STORE: > case IFN_MASK_LEN_GATHER_LOAD: > @@ -4925,6 +4993,9 @@ internal_fn_stored_value_index (internal_fn fn) > { > switch (fn) > { > + case IFN_MASK_LEN_STRIDED_STORE: > + return 2; > + > case IFN_MASK_STORE: > case IFN_MASK_STORE_LANES: > case IFN_SCATTER_STORE: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 23b4ab02b30..2d455938271 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see > - mask_load_lanes: currently just vec_mask_load_lanes > - mask_len_load_lanes: currently just vec_mask_len_load_lanes > - gather_load: used for {mask_,mask_len_,}gather_load > + - strided_load: currently just mask_len_strided_load > - len_load: currently just len_load > - mask_len_load: currently just mask_len_load > > @@ -64,6 +65,7 @@ along with GCC; see the file COPYING3. If not see > - mask_store_lanes: currently just vec_mask_store_lanes > - mask_len_store_lanes: currently just vec_mask_len_store_lanes > - scatter_store: used for {mask_,mask_len_,}scatter_store > + - strided_store: currently just mask_len_strided_store > - len_store: currently just len_store > - mask_len_store: currently just mask_len_store > > @@ -212,6 +214,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE, > mask_gather_load, gather_load) > DEF_INTERNAL_OPTAB_FN (MASK_LEN_GATHER_LOAD, ECF_PURE, > mask_len_gather_load, gather_load) > +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_LOAD, ECF_PURE, > + mask_len_strided_load, strided_load) > > DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load) > DEF_INTERNAL_OPTAB_FN (MASK_LEN_LOAD, ECF_PURE, mask_len_load, mask_len_load) > @@ -221,6 +225,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0, > mask_scatter_store, scatter_store) > DEF_INTERNAL_OPTAB_FN (MASK_LEN_SCATTER_STORE, 0, > mask_len_scatter_store, scatter_store) > +DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_STORE, 0, > + mask_len_strided_store, strided_store) > > DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store) > DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index b48e2e5a5ac..90be40f74d5 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -548,6 +548,8 @@ OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES) > OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") > OPTAB_D (len_load_optab, "len_load_$a") > OPTAB_D (len_store_optab, "len_store_$a") > +OPTAB_D (mask_len_strided_load_optab, "mask_len_strided_load_$a") > +OPTAB_D (mask_len_strided_store_optab, "mask_len_strided_store_$a") > OPTAB_D (select_vl_optab, "select_vl$a") > OPTAB_D (andn_optab, "andn$a3") > OPTAB_D (iorn_optab, "iorn$a3") > -- > 2.43.0 >