Hi, The commit r10-7415 brings scalar type consideration to eliminate epilogue peeling for gaps, but it exposed one problem that the current handling doesn't consider the memory access type VMAT_CONTIGUOUS_REVERSE, for which the overrun happens on low address side. This patch is to make the code take care of it by updating the offset and construction element order accordingly.
Bootstrapped/regtested on powerpc64le-linux-gnu P8 and aarch64-linux-gnu. BR, Kewen ----------- gcc/ChangeLog 2020-04-02 Kewen Lin <li...@gcc.gnu.org> PR tree-optimization/94401 * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE access type when loading halves of vector to avoid peeling for gaps.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 12beef6978c..3d27f59ba22 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (new_vtype != NULL_TREE) ltype = half_vtype; } + tree offset = dataref_offset + ? dataref_offset + : build_int_cst (ref_type, 0); + if (ltype != vectype + && memory_access_type == VMAT_CONTIGUOUS_REVERSE) + offset = size_binop ( + PLUS_EXPR, + build_int_cst (ref_type, + DR_GROUP_GAP (first_stmt_info) + * tree_to_uhwi ( + TYPE_SIZE_UNIT (elem_type))), + offset); data_ref - = fold_build2 (MEM_REF, ltype, dataref_ptr, - dataref_offset - ? dataref_offset - : build_int_cst (ref_type, 0)); + = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); if (alignment_support_scheme == dr_aligned) ; else if (DR_MISALIGNMENT (first_dr_info) == -1) @@ -9607,16 +9616,27 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, TYPE_ALIGN (elem_type)); if (ltype != vectype) { - vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); + vect_copy_ref_info (data_ref, + DR_REF (first_dr_info->dr)); tree tem = make_ssa_name (ltype); new_stmt = gimple_build_assign (tem, data_ref); - vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + vect_finish_stmt_generation (stmt_info, new_stmt, + gsi); data_ref = NULL; vec<constructor_elt, va_gc> *v; vec_alloc (v, 2); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - build_zero_cst (ltype)); + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) + { + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); + } + else + { + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); + } gcc_assert (new_vtype != NULL_TREE); if (new_vtype == vectype) new_stmt = gimple_build_assign (