This patch support generating MASK_LEN_STRIDED_LOAD/MASK_LEN_STRIDED_STORE IR for invariant stride memory access.
It's a special optimization for targets like RVV. RVV has both indexed load/store and stride load/store. In RVV, we always have gather/scatter and strided optab at the same time. E.g. void foo (int *__restrict a, int * __restrict b, int n, int *__restrict indice) { for (int i = 0; i < n; i++) a[indice[i]] = b[indice[i]] + a[i]; } Such vector codes, RVV is using indexed load/store for gather/scatter. E.g. void foo (int *__restrict a, int * __restrict b, int n, int m) { for (int i = 0; i < n; i++) a[i] = b[i * m] + a[i]; } Such vector codes, RVV is using stride load/store instructions. We only need to support direct mask_len_stride_xxx optab for invariant stride. gcc/ChangeLog: * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Add MASK_LEN_STRIDED_LOAD/MASK_LEN_STRIDED_STORE. (vectorizable_store): Ditto. (vectorizable_load): Ditto. --- gcc/tree-vect-stmts.cc | 47 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ee89f47c468..9c65b688510 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2863,6 +2863,17 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump); } + /* Target supports strided load/store use DR_STEP as stride for VEC_OFFSET + directly instead of build VEC_OFFSET with VEC_SERIES. */ + internal_fn ifn + = DR_IS_READ (dr) ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE; + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) + { + *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, + unshare_expr (DR_STEP (dr))); + return; + } + /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ tree offset_type = TREE_TYPE (gs_info->offset_vectype); @@ -9012,10 +9023,20 @@ vectorizable_store (vec_info *vinfo, gcall *call; if (final_len && final_mask) - call = gimple_build_call_internal - (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, - vec_offset, scale, vec_oprnd, final_mask, - final_len, bias); + { + if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) + call = gimple_build_call_internal ( + IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, + vec_offset, scale, vec_oprnd, final_mask, final_len, + bias); + else + /* non-vector type offset means that target prefers to + use MASK_LEN_STRIDED_STORE instead of + MASK_LEN_GATHER_STORE with direct stride argument. */ + call = gimple_build_call_internal ( + IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, + vec_offset, vec_oprnd, final_mask, final_len, bias); + } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, @@ -10956,11 +10977,19 @@ vectorizable_load (vec_info *vinfo, gcall *call; if (final_len && final_mask) - call - = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7, - dataref_ptr, vec_offset, - scale, zero, final_mask, - final_len, bias); + { + if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) + call = gimple_build_call_internal ( + IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset, + scale, zero, final_mask, final_len, bias); + else + /* non-vector type offset means that target prefers to + use MASK_LEN_STRIDED_LOAD instead of + MASK_LEN_GATHER_LOAD with direct stride argument. */ + call = gimple_build_call_internal ( + IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset, + zero, final_mask, final_len, bias); + } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, vec_offset, -- 2.36.3