https://gcc.gnu.org/g:79e4f386ad836b7f9e1bdaf6aded4585035432ef
commit r16-3044-g79e4f386ad836b7f9e1bdaf6aded4585035432ef Author: Richard Biener <rguent...@suse.de> Date: Mon Aug 4 11:27:54 2025 +0200 Record gather/scatter scale and base in the SLP tree The main gather/scatter discovery happens at SLP discovery time, the base address and the offset scale are currently not explicitly represented in the SLP tree. This requires re-discovery of them during vectorizable_store/load. The following fixes this by recording this info into the SLP tree. This allows the main vect_check_gather_scatter call to be elided from get_load_store_type and replaced with target support checks for IFN/decl or fallback emulated mode. There's vect_check_gather_scatter left in the path using gather/scatter for strided load/store. I hope to deal with this later. * tree-vectorizer.h (_slp_tree::gs_scale): New. (_slp_tree::gs_base): Likewise. (SLP_TREE_GS_SCALE): Likewise. (SLP_TREE_GS_BASE): Likewise. (vect_describe_gather_scatter_call): Declare. * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize new members. (vect_build_slp_tree_2): Record gather/scatter base and scale. (vect_get_and_check_slp_defs): For gather/scatter IFNs describe the call to first_gs_info. * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Add mode of operation with fixed offset vector type. (vect_describe_gather_scatter_call): Export. * tree-vect-stmts.cc (get_load_store_type): Do not call vect_check_gather_scatter to fill gs_info, instead populate from the SLP tree. Check which of, IFN, decl or fallback is supported and record that decision. Diff: --- gcc/tree-vect-data-refs.cc | 24 ++++++++++++++++------ gcc/tree-vect-slp.cc | 22 ++++++++++++++++++++ gcc/tree-vect-stmts.cc | 51 +++++++++++++++++++++++++++------------------- gcc/tree-vectorizer.h | 9 ++++++++ 4 files changed, 79 insertions(+), 27 deletions(-) diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index dc82bf688aba..4db0ec8abae6 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4423,8 +4423,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) MASKED_P is true if the load or store is conditional. MEMORY_TYPE is the type of the memory elements being loaded or stored. OFFSET_TYPE is the type of the offset that is being applied to the invariant - base address. SCALE is the amount by which the offset should - be multiplied *after* it has been converted to address width. + base address. If OFFSET_TYPE is scalar the function chooses an + appropriate vector type for it. SCALE is the amount by which the + offset should be multiplied *after* it has been converted to address width. Return true if the function is supported, storing the function id in *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. @@ -4467,9 +4468,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, for (;;) { - tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); - if (!offset_vectype) - return false; + tree offset_vectype; + if (VECTOR_TYPE_P (offset_type)) + offset_vectype = offset_type; + else + { + offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; + } /* Test whether the target supports this combination. */ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, @@ -4500,10 +4507,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, return true; } + /* For fixed offset vector type we're done. */ + if (VECTOR_TYPE_P (offset_type)) + return false; + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE && TYPE_PRECISION (offset_type) >= element_bits) return false; + /* Try a larger offset vector type. */ offset_type = build_nonstandard_integer_type (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); } @@ -4512,7 +4524,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, /* STMT_INFO is a call to an internal gather load or scatter store function. Describe the operation in INFO. */ -static void +void vect_describe_gather_scatter_call (stmt_vec_info stmt_info, gather_scatter_info *info) { diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 794a073b3758..59acbc1aeb57 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -120,6 +120,8 @@ _slp_tree::_slp_tree () SLP_TREE_LANE_PERMUTATION (this) = vNULL; SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def; SLP_TREE_CODE (this) = ERROR_MARK; + SLP_TREE_GS_SCALE (this) = 0; + SLP_TREE_GS_BASE (this) = NULL_TREE; this->ldst_lanes = false; this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; @@ -680,6 +682,15 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, { internal_fn ifn = gimple_call_internal_fn (stmt); commutative_op = first_commutative_argument (ifn); + if (internal_gather_scatter_fn_p (ifn)) + { + vect_describe_gather_scatter_call + (stmt_info, + first ? &(*oprnds_info)[0]->first_gs_info : &gs_info); + if (first) + (*oprnds_info)[0]->first_gs_p = true; + gs_op = 0; + } } } else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) @@ -2720,6 +2731,9 @@ out: stmt_info = stmts[0]; + int gs_scale = 0; + tree gs_base = NULL_TREE; + /* Create SLP_TREE nodes for the definition node/s. */ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) { @@ -2742,6 +2756,12 @@ out: continue; } + if (oprnd_info->first_gs_p) + { + gs_scale = oprnd_info->first_gs_info.scale; + gs_base = oprnd_info->first_gs_info.base; + } + if (is_a <bb_vec_info> (vinfo) && oprnd_info->first_dt == vect_internal_def && !oprnd_info->any_pattern) @@ -3131,6 +3151,8 @@ fail: node = vect_create_new_slp_node (node, stmts, nops); SLP_TREE_VECTYPE (node) = vectype; SLP_TREE_CHILDREN (node).splice (children); + SLP_TREE_GS_SCALE (node) = gs_scale; + SLP_TREE_GS_BASE (node) = gs_base; return node; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 3bf2deca8eaa..a6f4db43a495 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2019,31 +2019,40 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + memset (gs_info, 0, sizeof (gather_scatter_info)); gs_info->offset_vectype = offset_vectype; - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + gs_info->scale = SLP_TREE_GS_SCALE (slp_node); + gs_info->base = SLP_TREE_GS_BASE (slp_node); + gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); + gs_info->decl = NULL_TREE; + gs_info->ifn = IFN_LAST; + tree tem; + if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, + masked_p, vectype, + gs_info->memory_type, + offset_vectype, gs_info->scale, + &gs_info->ifn, &tem, + elsvals)) + /* GATHER_SCATTER_IFN_P. */; + else if (vls_type == VLS_LOAD + ? (targetm.vectorize.builtin_gather + && (gs_info->decl + = targetm.vectorize.builtin_gather (vectype, + TREE_TYPE + (offset_vectype), + gs_info->scale))) + : (targetm.vectorize.builtin_scatter + && (gs_info->decl + = targetm.vectorize.builtin_scatter (vectype, + TREE_TYPE + (offset_vectype), + gs_info->scale)))) + /* GATHER_SCATTER_LEGACY_P. */; + else { + /* GATHER_SCATTER_EMULATED_P. */ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () || VECTOR_BOOLEAN_TYPE_P (offset_vectype) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 52e1075f7e49..9653496362ff 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -306,6 +306,11 @@ struct _slp_tree { unsigned int lanes; /* The operation of this node. */ enum tree_code code; + /* For gather/scatter memory operations the scale each offset element + should be multiplied by before being added to the base. */ + int gs_scale; + /* For gather/scatter memory operations the loop-invariant base value. */ + tree gs_base; /* Whether uses of this load or feeders of this store are suitable for load/store-lanes. */ bool ldst_lanes; @@ -412,6 +417,8 @@ public: #define SLP_TREE_CODE(S) (S)->code #define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type #define SLP_TREE_TYPE(S) (S)->type +#define SLP_TREE_GS_SCALE(S) (S)->gs_scale +#define SLP_TREE_GS_BASE(S) (S)->gs_base enum vect_partial_vector_style { vect_partial_vectors_none, @@ -2550,6 +2557,8 @@ extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *, vec<int> * = nullptr); +extern void vect_describe_gather_scatter_call (stmt_vec_info, + gather_scatter_info *); extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, vec<data_reference_p> *, vec<int> *, int);