This allows all permutations we can generate (according to the target).

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2015-06-03  Richard Biener  <rguent...@suse.de>

        * tree-vect-stmts.c (vectorizable_load): Compute the pointer
        adjustment for gaps at the end of a SLP load group properly.
        * tree-vect-slp.c (vect_supported_load_permutation_p): Allow
        all permutations we can generate.

        * gcc.dg/vect/slp-perm-10.c: New testcase.
        * gcc.dg/vect/slp-23.c: Adjust.

Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c       (revision 224061)
--- gcc/tree-vect-stmts.c       (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5807,5813 ****
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size = -1, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
--- 5807,5813 ----
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size = -1, group_gap_adj;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 6402,6413 ****
        {
          grouped_load = false;
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
!         group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
        }
        else
        {
          vec_num = group_size;
!         group_gap = 0;
        }
      }
    else
--- 6402,6413 ----
        {
          grouped_load = false;
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
!         group_gap_adj = vf * group_size - nunits * vec_num;
        }
        else
        {
          vec_num = group_size;
!         group_gap_adj = 0;
        }
      }
    else
*************** vectorizable_load (gimple stmt, gimple_s
*** 6415,6421 ****
        first_stmt = stmt;
        first_dr = dr;
        group_size = vec_num = 1;
!       group_gap = 0;
      }
  
    alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
--- 6415,6421 ----
        first_stmt = stmt;
        first_dr = dr;
        group_size = vec_num = 1;
!       group_gap_adj = 0;
      }
  
    alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
*************** vectorizable_load (gimple stmt, gimple_s
*** 6832,6842 ****
                SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
            }
          /* Bump the vector pointer to account for a gap.  */
!         if (slp && group_gap != 0)
            {
              tree bump = size_binop (MULT_EXPR,
                                      TYPE_SIZE_UNIT (elem_type),
!                                     size_int (group_gap));
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
                                             stmt, bump);
            }
--- 6832,6842 ----
                SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
            }
          /* Bump the vector pointer to account for a gap.  */
!         if (group_gap_adj != 0)
            {
              tree bump = size_binop (MULT_EXPR,
                                      TYPE_SIZE_UNIT (elem_type),
!                                     size_int (group_gap_adj));
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
                                             stmt, bump);
            }
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 224061)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_supported_load_permutation_p (slp_i
*** 1506,1552 ****
        return true;
      }
  
!   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
!      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
!      well (unless it's reduction).  */
!   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
!     return false;
!   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
!     if (!node->load_permutation.exists ())
!       return false;
! 
!   load_index = sbitmap_alloc (group_size);
!   bitmap_clear (load_index);
!   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
!     {
!       unsigned int lidx = node->load_permutation[0];
!       if (bitmap_bit_p (load_index, lidx))
!       {
!         sbitmap_free (load_index);
!         return false;
!       }
!       bitmap_set_bit (load_index, lidx);
!       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
!       if (k != lidx)
!         {
!           sbitmap_free (load_index);
!           return false;
!         }
!     }
!   for (i = 0; i < group_size; i++)
!     if (!bitmap_bit_p (load_index, i))
!       {
!       sbitmap_free (load_index);
!       return false;
!       }
!   sbitmap_free (load_index);
! 
    FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
      if (node->load_permutation.exists ()
        && !vect_transform_slp_perm_load
              (node, vNULL, NULL,
               SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
        return false;
    return true;
  }
  
--- 1504,1517 ----
        return true;
      }
  
!   /* For loop vectorization verify we can generate the permutation.  */
    FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
      if (node->load_permutation.exists ()
        && !vect_transform_slp_perm_load
              (node, vNULL, NULL,
               SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
        return false;
+ 
    return true;
  }
  
Index: gcc/testsuite/gcc.dg/vect/slp-23.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-23.c  (revision 224061)
--- gcc/testsuite/gcc.dg/vect/slp-23.c  (working copy)
*************** int main (void)
*** 108,112 ****
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { 
vect_strided8 && { ! { vect_no_align} } } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
    
--- 108,113 ----
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { 
vect_strided8 && { ! { vect_no_align} } } } } } */
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
! { vect_strided8 || vect_no_align } } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { ! vect_perm } } } } */
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
target vect_perm } } } */
    
Index: gcc/testsuite/gcc.dg/vect/slp-perm-10.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-perm-10.c     (revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-perm-10.c     (working copy)
***************
*** 0 ****
--- 1,53 ----
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+ 
+ int a[256], b[256];
+ 
+ void __attribute__((noinline))
+ foo (void)
+ {
+   int i;
+   for (i = 0; i < 32; ++i)
+     {
+       b[i*8+0] = a[i*8+0];
+       b[i*8+1] = a[i*8+0];
+       b[i*8+2] = a[i*8+3];
+       b[i*8+3] = a[i*8+3];
+       b[i*8+4] = a[i*8+4];
+       b[i*8+5] = a[i*8+6];
+       b[i*8+6] = a[i*8+4];
+       b[i*8+7] = a[i*8+6];
+     }
+ }
+ 
+ int main ()
+ {
+   int i;
+ 
+   check_vect ();
+ 
+   for (i = 0; i < 256; ++i)
+     {
+       a[i] = i;
+       __asm__ volatile ("");
+     }
+ 
+   foo ();
+ 
+   for (i = 0; i < 32; ++i)
+     if (b[i*8+0] != i*8+0
+       || b[i*8+1] != i*8+0
+       || b[i*8+2] != i*8+3
+       || b[i*8+3] != i*8+3
+       || b[i*8+4] != i*8+4
+       || b[i*8+5] != i*8+6
+       || b[i*8+6] != i*8+4
+       || b[i*8+7] != i*8+6)
+       abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
vect_perm } } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target vect_perm } } } */

Reply via email to