Hi, 

The commit r10-7415 brings scalar type consideration
to eliminate epilogue peeling for gaps, but it exposed
one problem that the current handling doesn't consider
the memory access type VMAT_CONTIGUOUS_REVERSE, for
which the overrun happens on low address side.  This
patch is to make the code take care of it by updating
the offset and construction element order accordingly.

Bootstrapped/regtested on powerpc64le-linux-gnu P8
and aarch64-linux-gnu.

BR,
Kewen
-----------
gcc/ChangeLog

2020-04-02  Kewen Lin  <li...@gcc.gnu.org>

        PR tree-optimization/94401
        * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
        access type when loading halves of vector to avoid peeling for gaps.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 12beef6978c..3d27f59ba22 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
                            if (new_vtype != NULL_TREE)
                              ltype = half_vtype;
                          }
+                       tree offset = dataref_offset
+                                       ? dataref_offset
+                                       : build_int_cst (ref_type, 0);
+                       if (ltype != vectype
+                           && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                         offset = size_binop (
+                           PLUS_EXPR,
+                           build_int_cst (ref_type,
+                                          DR_GROUP_GAP (first_stmt_info)
+                                            * tree_to_uhwi (
+                                              TYPE_SIZE_UNIT (elem_type))),
+                           offset);
                        data_ref
-                         = fold_build2 (MEM_REF, ltype, dataref_ptr,
-                                        dataref_offset
-                                        ? dataref_offset
-                                        : build_int_cst (ref_type, 0));
+                         = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
                        if (alignment_support_scheme == dr_aligned)
                          ;
                        else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9607,16 +9616,27 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
                                                  TYPE_ALIGN (elem_type));
                        if (ltype != vectype)
                          {
-                           vect_copy_ref_info (data_ref, DR_REF 
(first_dr_info->dr));
+                           vect_copy_ref_info (data_ref,
+                                               DR_REF (first_dr_info->dr));
                            tree tem = make_ssa_name (ltype);
                            new_stmt = gimple_build_assign (tem, data_ref);
-                           vect_finish_stmt_generation (stmt_info, new_stmt, 
gsi);
+                           vect_finish_stmt_generation (stmt_info, new_stmt,
+                                                        gsi);
                            data_ref = NULL;
                            vec<constructor_elt, va_gc> *v;
                            vec_alloc (v, 2);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-                                                   build_zero_cst (ltype));
+                           if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                             }
+                           else
+                             {
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                       build_zero_cst (ltype));
+                             }
                            gcc_assert (new_vtype != NULL_TREE);
                            if (new_vtype == vectype)
                              new_stmt = gimple_build_assign (

Reply via email to