On Fri, Sep 18, 2020 at 8:18 AM Xiong Hu Luo <luo...@linux.ibm.com> wrote: > > This patch enables transformation from ARRAY_REF(VIEW_CONVERT_EXPR) to > VEC_SET internal function in gimple-isel pass if target supports > vec_set with variable index by checking can_vec_set_var_idx_p. > > gcc/ChangeLog: > > 2020-09-18 Xionghu Luo <luo...@linux.ibm.com> > > * gimple-isel.cc (gimple_expand_vec_set_expr): New function. > (gimple_expand_vec_cond_exprs): Call gimple_expand_vec_set_expr. > * internal-fn.c (vec_set_direct): New define. > (expand_vec_set_optab_fn): New function. > (direct_vec_set_optab_supported_p): New define. > * internal-fn.def (VEC_SET): New DEF_INTERNAL_OPTAB_FN. > * optabs.c (can_vec_set_var_idx_p): New function. > * optabs.h (can_vec_set_var_idx_p): New declare. > --- > gcc/gimple-isel.cc | 116 +++++++++++++++++++++++++++++++++++++++++++- > gcc/internal-fn.c | 36 ++++++++++++++ > gcc/internal-fn.def | 2 + > gcc/optabs.c | 17 +++++++ > gcc/optabs.h | 3 ++ > 5 files changed, 172 insertions(+), 2 deletions(-) > > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc > index b330cf4c20e..bc61e2895be 100644 > --- a/gcc/gimple-isel.cc > +++ b/gcc/gimple-isel.cc > @@ -35,6 +35,80 @@ along with GCC; see the file COPYING3. If not see > #include "tree-cfg.h" > #include "bitmap.h" > #include "tree-ssa-dce.h" > +#include "fold-const.h" > +#include "gimple-fold.h" > +#include "memmodel.h" > +#include "optabs.h" > + > +/* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to > + internal function based on vector type of selected expansion. > + i.e.: > + VIEW_CONVERT_EXPR<int[4]>(u)[_1] = = i_4(D); > + => > + _7 = u; > + _8 = .VEC_SET (_7, i_4(D), _1); > + u = _8; */ > + > +static gimple * > +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi) > +{ > + enum tree_code code; > + gcall *new_stmt = NULL; > + gassign *ass_stmt = NULL; > + > + /* Only consider code == GIMPLE_ASSIGN. */ > + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi)); > + if (!stmt) > + return NULL; > + > + code = TREE_CODE (gimple_assign_lhs (stmt));
do the lhs = gimple_assign_lhs (stmt) before and elide cond, putting the TREE_CODE into the if below. > + if (code != ARRAY_REF) > + return NULL; > + > + tree lhs = gimple_assign_lhs (stmt); > + tree val = gimple_assign_rhs1 (stmt); > + > + tree type = TREE_TYPE (lhs); > + tree op0 = TREE_OPERAND (lhs, 0); > + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR So I think we want to have an exact structural match first here, so if (TREE_CODE (op0) == VIEW_CONVERT_EXPR && DECL_P (TREE_OPERAND (op0, 0)) && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))) && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 0)))) which means we're sure to do an element extract from a vector type (and we know all vector types have sane element types). > + && tree_fits_uhwi_p (TYPE_SIZE (type))) > + { > + tree pos = TREE_OPERAND (lhs, 1); > + tree view_op0 = TREE_OPERAND (op0, 0); > + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); > + scalar_mode innermode = GET_MODE_INNER (outermode); > + tree_code code = TREE_CODE (TREE_TYPE(view_op0)); > + if (!is_global_var (view_op0) && code == VECTOR_TYPE > + && tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (view_op0))) why did you need those TYPE_SIZE checks? As said earlier you want !TREE_ADDRESSABLE (view_op0) and eventually the stronger auto_var_in_fn_p (view_op0, cfun) rather than !is_global_var. > + && can_vec_set_var_idx_p (code, outermode, innermode, > + TYPE_MODE (TREE_TYPE (pos)))) > + { > + location_t loc = gimple_location (stmt); > + tree var_src = make_ssa_name (TREE_TYPE (view_op0)); > + tree var_dst = make_ssa_name (TREE_TYPE (view_op0)); > + > + ass_stmt = gimple_build_assign (var_src, view_op0); > + gimple_set_vuse (ass_stmt, gimple_vuse (stmt)); > + gimple_set_location (ass_stmt, loc); > + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); > + > + new_stmt > + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos); > + gimple_call_set_lhs (new_stmt, var_dst); > + gimple_set_location (new_stmt, loc); > + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > + > + ass_stmt = gimple_build_assign (view_op0, var_dst); > + gimple_set_location (ass_stmt, loc); > + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); > + > + gimple_move_vops (ass_stmt, stmt); > + gsi_remove (gsi, true); > + } > + } > + > + return ass_stmt; > +} > > /* Expand all VEC_COND_EXPR gimple assignments into calls to internal > function based on type of selected expansion. */ > @@ -187,8 +261,25 @@ gimple_expand_vec_cond_exprs (void) > { > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > - gimple *g = gimple_expand_vec_cond_expr (&gsi, > - &vec_cond_ssa_name_uses); > + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); > + if (!stmt) > + continue; > + > + enum tree_code code; > + gimple *g = NULL; > + code = gimple_assign_rhs_code (stmt); > + switch (code) > + { > + case VEC_COND_EXPR: > + g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses); > + break; > + case ARRAY_REF: > + /* TODO: generate IFN for vec_extract with variable index. */ so why not do this here? > + break; > + default: > + break; > + } > + > if (g != NULL) > { > tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); > @@ -204,6 +295,27 @@ gimple_expand_vec_cond_exprs (void) > > simple_dce_from_worklist (dce_ssa_names); > > + FOR_EACH_BB_FN (bb, cfun) but in a separate loop? > + { > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); > + if (!stmt) > + continue; > + > + enum tree_code code; > + code = TREE_CODE (gimple_assign_lhs (stmt)); > + switch (code) > + { > + case ARRAY_REF: > + gimple_expand_vec_set_expr (&gsi); > + break; > + default: > + break; > + } > + } > + } > + > return 0; > } > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index 8efc77d986b..36837381c04 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -115,6 +115,7 @@ init_internal_fns () > #define vec_condeq_direct { 0, 0, false } > #define scatter_store_direct { 3, 1, false } > #define len_store_direct { 3, 3, false } > +#define vec_set_direct { 3, 3, false } > #define unary_direct { 0, 0, true } > #define binary_direct { 0, 0, true } > #define ternary_direct { 0, 0, true } > @@ -2658,6 +2659,40 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall > *stmt, convert_optab optab) > > #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn > > +static void > +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) all new functions require a function level comment > +{ > + tree lhs = gimple_call_lhs (stmt); > + tree op0 = gimple_call_arg (stmt, 0); > + tree op1 = gimple_call_arg (stmt, 1); > + tree op2 = gimple_call_arg (stmt, 2); > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE); > + > + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); > + scalar_mode innermode = GET_MODE_INNER (outermode); > + > + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL); > + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL); > + > + class expand_operand ops[3]; > + enum insn_code icode = optab_handler (optab, outermode); > + > + if (icode != CODE_FOR_nothing) > + { > + pos = convert_to_mode (E_SImode, pos, 0); > + > + create_fixed_operand (&ops[0], src); > + create_input_operand (&ops[1], value, innermode); > + create_input_operand (&ops[2], pos, GET_MODE (pos)); > + if (maybe_expand_insn (icode, 3, ops)) > + { > + emit_move_insn (target, src); I think you need to assert that we end up here. > + return; > + } > + } > +} > + > static void > expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) > { > @@ -3253,6 +3288,7 @@ multi_vector_optab_supported_p (convert_optab optab, > tree_pair types, > #define direct_fold_left_optab_supported_p direct_optab_supported_p > #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p > #define direct_check_ptrs_optab_supported_p direct_optab_supported_p > +#define direct_vec_set_optab_supported_p direct_optab_supported_p > > /* Return the optab used by internal function FN. */ > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 13e60828fcf..e6cfe1b6159 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) > DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) > DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) > > +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set) > + > DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) > > DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) > diff --git a/gcc/optabs.c b/gcc/optabs.c > index 184827fdf4e..c8125670d2d 100644 > --- a/gcc/optabs.c > +++ b/gcc/optabs.c > @@ -3841,6 +3841,23 @@ can_vcond_compare_p (enum rtx_code code, machine_mode > value_mode, > && insn_operand_matches (icode, 3, test); > } > > +bool > +can_vec_set_var_idx_p (enum tree_code code, machine_mode vec_mode, > + machine_mode value_mode, machine_mode idx_mode) toplevel comment missing > +{ > + gcc_assert (code == VECTOR_TYPE); what's the point of pasing 'code' here then? Since the optab only has a single mode, the vector mode, the value_mode is redundant as well. And I guess we might want to handle "arbitrary" index modes? That is, the .md expanders should not restrict its mode - I guess it simply uses VOIDmode at the moment (for integer constants). Not sure how to best do this without an explicit mode in the optab ... > + > + rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1); > + rtx reg2 = alloca_raw_REG (value_mode, LAST_VIRTUAL_REGISTER + 2); > + rtx reg3 = alloca_raw_REG (idx_mode, LAST_VIRTUAL_REGISTER + 3); > + > + enum insn_code icode = optab_handler (vec_set_optab, vec_mode); > + > + return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1) > + && insn_operand_matches (icode, 1, reg2) > + && insn_operand_matches (icode, 2, reg3); > +} > + > /* This function is called when we are going to emit a compare instruction > that > compares the values found in X and Y, using the rtl operator COMPARISON. > > diff --git a/gcc/optabs.h b/gcc/optabs.h > index 7c2ec257cb0..c018d127756 100644 > --- a/gcc/optabs.h > +++ b/gcc/optabs.h > @@ -249,6 +249,9 @@ extern int can_compare_p (enum rtx_code, machine_mode, > VALUE_MODE. */ > extern bool can_vcond_compare_p (enum rtx_code, machine_mode, machine_mode); > > +extern bool can_vec_set_var_idx_p (enum tree_code, machine_mode, > machine_mode, > + machine_mode); > + > extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode, > machine_mode, int); > /* Emit a pair of rtl insns to compare two rtx's and to jump > -- > 2.27.0.90.geebb51ba8c >