Currently the vectorizer forces unrolling for grouped loads that
have DR_STEP not constant, forcing the elements loaded with strided
load support.  The following patch enhances that machinery to deal
with SLP used groups that have non-constant DR_STEP, avoiding the
excessive unrolling (and (un-)packing).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-08  Richard Biener  <rguent...@suse.de>

        * tree-vect-data-refs.c (vect_compute_data_ref_alignment):
        Handle strided group loads.
        (vect_verify_datarefs_alignment): Likewise.
        (vect_enhance_data_refs_alignment): Likewise.
        (vect_analyze_group_access): Likewise.
        (vect_analyze_data_ref_access): Likewise.
        (vect_analyze_data_ref_accesses): Likewise.
        * tree-vect-stmts.c (vect_model_load_cost): Likewise.
        (vectorizable_load): Likewise.

        * gcc.dg/vect/slp-41.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig      2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-data-refs.c   2015-05-08 13:26:23.839725349 +0200
*************** vect_compute_data_ref_alignment (struct
*** 671,677 ****
    tree vectype;
    tree base, base_addr;
    bool base_aligned;
!   tree misalign;
    tree aligned_to;
    unsigned HOST_WIDE_INT alignment;
  
--- 671,677 ----
    tree vectype;
    tree base, base_addr;
    bool base_aligned;
!   tree misalign = NULL_TREE;
    tree aligned_to;
    unsigned HOST_WIDE_INT alignment;
  
*************** vect_compute_data_ref_alignment (struct
*** 687,696 ****
  
    /* Strided loads perform only component accesses, misalignment information
       is irrelevant for them.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      return true;
  
!   misalign = DR_INIT (dr);
    aligned_to = DR_ALIGNED_TO (dr);
    base_addr = DR_BASE_ADDRESS (dr);
    vectype = STMT_VINFO_VECTYPE (stmt_info);
--- 687,698 ----
  
    /* Strided loads perform only component accesses, misalignment information
       is irrelevant for them.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
      return true;
  
!   if (tree_fits_shwi_p (DR_STEP (dr)))
!     misalign = DR_INIT (dr);
    aligned_to = DR_ALIGNED_TO (dr);
    base_addr = DR_BASE_ADDRESS (dr);
    vectype = STMT_VINFO_VECTYPE (stmt_info);
*************** vect_compute_data_ref_alignment (struct
*** 704,712 ****
    if (loop && nested_in_vect_loop_p (loop, stmt))
      {
        tree step = DR_STEP (dr);
-       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
  
!       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
          {
            if (dump_enabled_p ())
              dump_printf_loc (MSG_NOTE, vect_location,
--- 706,714 ----
    if (loop && nested_in_vect_loop_p (loop, stmt))
      {
        tree step = DR_STEP (dr);
  
!       if (tree_fits_shwi_p (step)
!         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
          {
            if (dump_enabled_p ())
              dump_printf_loc (MSG_NOTE, vect_location,
*************** vect_compute_data_ref_alignment (struct
*** 732,740 ****
    if (!loop)
      {
        tree step = DR_STEP (dr);
-       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
  
!       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 734,742 ----
    if (!loop)
      {
        tree step = DR_STEP (dr);
  
!       if (tree_fits_shwi_p (step)
!         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*************** vect_verify_datarefs_alignment (loop_vec
*** 964,970 ****
  
        /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
        continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
--- 966,973 ----
  
        /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1431,1437 ****
  
        /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
        continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
--- 1434,1441 ----
  
        /* Strided loads perform only component accesses, alignment is
         irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!         && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
        continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1723,1729 ****
  
          /* Strided loads perform only component accesses, alignment is
             irrelevant for them.  */
!         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
            continue;
  
          save_misalignment = DR_MISALIGNMENT (dr);
--- 1727,1734 ----
  
          /* Strided loads perform only component accesses, alignment is
             irrelevant for them.  */
!         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!             && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
            continue;
  
          save_misalignment = DR_MISALIGNMENT (dr);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1841,1850 ****
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
            continue;
  
-         /* Strided loads perform only component accesses, alignment is
-            irrelevant for them.  */
          if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!           continue;
  
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
  
--- 1846,1860 ----
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
            continue;
  
          if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!           {
!             /* Strided loads perform only component accesses, alignment is
!                irrelevant for them.  */
!             if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
!               continue;
!             do_versioning = false;
!             break;
!           }
  
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
  
*************** vect_analyze_group_access (struct data_r
*** 2057,2063 ****
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
!   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
    HOST_WIDE_INT groupsize, last_accessed_element = 1;
    bool slp_impossible = false;
    struct loop *loop = NULL;
--- 2067,2073 ----
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
!   HOST_WIDE_INT dr_step = -1;
    HOST_WIDE_INT groupsize, last_accessed_element = 1;
    bool slp_impossible = false;
    struct loop *loop = NULL;
*************** vect_analyze_group_access (struct data_r
*** 2067,2073 ****
  
    /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
       size of the interleaving group (including gaps).  */
!   groupsize = absu_hwi (dr_step) / type_size;
  
    /* Not consecutive access is possible only if it is a part of interleaving. 
 */
    if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
--- 2077,2089 ----
  
    /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
       size of the interleaving group (including gaps).  */
!   if (tree_fits_shwi_p (step))
!     {
!       dr_step = tree_to_shwi (step);
!       groupsize = absu_hwi (dr_step) / type_size;
!     }
!   else
!     groupsize = 0;
  
    /* Not consecutive access is possible only if it is a part of interleaving. 
 */
    if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
*************** vect_analyze_group_access (struct data_r
*** 2142,2148 ****
        tree prev_init = DR_INIT (data_ref);
        gimple prev = stmt;
        HOST_WIDE_INT diff, gaps = 0;
-       unsigned HOST_WIDE_INT count_in_bytes;
  
        while (next)
          {
--- 2158,2163 ----
*************** vect_analyze_group_access (struct data_r
*** 2211,2240 ****
            count++;
          }
  
!       /* COUNT is the number of accesses found, we multiply it by the size of
!          the type to get COUNT_IN_BYTES.  */
!       count_in_bytes = type_size * count;
! 
!       /* Check that the size of the interleaving (including gaps) is not
!          greater than STEP.  */
!       if (dr_step != 0
!         && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
!         {
!           if (dump_enabled_p ())
!             {
!               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                                "interleaving size is greater than step for ");
!               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
!                                  DR_REF (dr));
!               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
!             }
!           return false;
!         }
  
!       /* Check that the size of the interleaving is equal to STEP for stores,
           i.e., that there are no gaps.  */
!       if (dr_step != 0
!         && absu_hwi (dr_step) != count_in_bytes)
          {
            if (DR_IS_READ (dr))
              {
--- 2226,2237 ----
            count++;
          }
  
!       if (groupsize == 0)
!         groupsize = count + gaps;
  
!       /* Check that the size of the interleaving is equal to count for stores,
           i.e., that there are no gaps.  */
!       if (groupsize != count)
          {
            if (DR_IS_READ (dr))
              {
*************** vect_analyze_group_access (struct data_r
*** 2253,2278 ****
              }
          }
  
-       /* Check that STEP is a multiple of type size.  */
-       if (dr_step != 0
-         && (dr_step % type_size) != 0)
-         {
-           if (dump_enabled_p ())
-             {
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "step is not a multiple of type size: step ");
-               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
-               dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
-               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                  TYPE_SIZE_UNIT (scalar_type));
-               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-             }
-           return false;
-         }
- 
-       if (groupsize == 0)
-         groupsize = count + gaps;
- 
        GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
        if (dump_enabled_p ())
          dump_printf_loc (MSG_NOTE, vect_location,
--- 2250,2255 ----
*************** vect_analyze_data_ref_access (struct dat
*** 2392,2400 ****
        return false;
      }
  
    /* Assume this is a DR handled by non-constant strided load case.  */
    if (TREE_CODE (step) != INTEGER_CST)
!     return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
  
    /* Not consecutive access - check if it's a part of interleaving group.  */
    return vect_analyze_group_access (dr);
--- 2369,2380 ----
        return false;
      }
  
+ 
    /* Assume this is a DR handled by non-constant strided load case.  */
    if (TREE_CODE (step) != INTEGER_CST)
!     return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!           && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
!               || vect_analyze_group_access (dr)));
  
    /* Not consecutive access - check if it's a part of interleaving group.  */
    return vect_analyze_group_access (dr);
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2596,2610 ****
              || !gimple_assign_single_p (DR_STMT (drb)))
            break;
  
!         /* Check that the data-refs have the same constant size and step.  */
          tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
          tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
          if (!tree_fits_uhwi_p (sza)
              || !tree_fits_uhwi_p (szb)
!             || !tree_int_cst_equal (sza, szb)
!             || !tree_fits_shwi_p (DR_STEP (dra))
!             || !tree_fits_shwi_p (DR_STEP (drb))
!             || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
            break;
  
          /* Do not place the same access in the interleaving chain twice.  */
--- 2576,2591 ----
              || !gimple_assign_single_p (DR_STMT (drb)))
            break;
  
!         /* Check that the data-refs have the same constant size.  */
          tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
          tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
          if (!tree_fits_uhwi_p (sza)
              || !tree_fits_uhwi_p (szb)
!             || !tree_int_cst_equal (sza, szb))
!           break;
! 
!         /* Check that the data-refs have the same step.  */
!         if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
            break;
  
          /* Do not place the same access in the interleaving chain twice.  */
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2637,2647 ****
                  != type_size_a))
            break;
  
!         /* The step (if not zero) is greater than the difference between
!            data-refs' inits.  This splits groups into suitable sizes.  */
!         HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
!         if (step != 0 && step <= (init_b - init_a))
!           break;
  
          if (dump_enabled_p ())
            {
--- 2618,2632 ----
                  != type_size_a))
            break;
  
!         /* If the step (if not zero or non-constant) is greater than the
!            difference between data-refs' inits this splits groups into
!            suitable sizes.  */
!         if (tree_fits_shwi_p (DR_STEP (dra)))
!           {
!             HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
!             if (step != 0 && step <= (init_b - init_a))
!               break;
!           }
  
          if (dump_enabled_p ())
            {
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig  2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-stmts.c       2015-05-08 13:28:00.920573458 +0200
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1112,1118 ****
       equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
       access is instead being provided by a load-and-permute operation,
       include the cost of the permutes.  */
!   if (!load_lanes_p && group_size > 1)
      {
        /* Uses an even and odd extract operations or shuffle operations
         for each needed permute.  */
--- 1112,1119 ----
       equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
       access is instead being provided by a load-and-permute operation,
       include the cost of the permutes.  */
!   if (!load_lanes_p && group_size > 1
!       && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      {
        /* Uses an even and odd extract operations or shuffle operations
         for each needed permute.  */
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1127,1141 ****
      }
  
    /* The loads themselves.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      {
        /* N scalar loads plus gathering them into a vector.  */
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        inside_cost += record_stmt_cost (body_cost_vec,
                                       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
                                       scalar_load, stmt_info, 0, vect_body);
-       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
-                                      stmt_info, 0, vect_body);
      }
    else
      vect_get_load_cost (first_dr, ncopies,
--- 1128,1141 ----
      }
  
    /* The loads themselves.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
      {
        /* N scalar loads plus gathering them into a vector.  */
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        inside_cost += record_stmt_cost (body_cost_vec,
                                       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
                                       scalar_load, stmt_info, 0, vect_body);
      }
    else
      vect_get_load_cost (first_dr, ncopies,
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1143,1148 ****
--- 1143,1151 ----
                         || group_size > 1 || slp_node),
                        &inside_cost, &prologue_cost, 
                        prologue_cost_vec, body_cost_vec, true);
+   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
+                                      stmt_info, 0, vect_body);
  
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
*************** vectorizable_load (gimple stmt, gimple_s
*** 5657,5663 ****
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
--- 5660,5666 ----
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size = -1, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 5790,5798 ****
          return false;
        }
  
!       if (!slp && !PURE_SLP_STMT (stmt_info))
        {
-         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
          if (vect_load_lanes_supported (vectype, group_size))
            load_lanes_p = true;
          else if (!vect_grouped_load_supported (vectype, group_size))
--- 5793,5803 ----
          return false;
        }
  
!       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
!       if (!slp
!         && !PURE_SLP_STMT (stmt_info)
!         && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
        {
          if (vect_load_lanes_supported (vectype, group_size))
            load_lanes_p = true;
          else if (!vect_grouped_load_supported (vectype, group_size))
*************** vectorizable_load (gimple stmt, gimple_s
*** 5847,5853 ****
        }
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     ;
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
--- 5852,5873 ----
        }
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     {
!       if ((grouped_load
!          && (slp || PURE_SLP_STMT (stmt_info)))
!         && (group_size > nunits
!             || nunits % group_size != 0
!             /* ???  During analysis phase we are not called with the
!                slp node/instance we are in so whether we'll end up
!                with a permutation we don't know.  Still we don't
!                support load permutations.  */
!             || slp_perm))
!       {
!         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                          "unhandled strided group load\n");
!         return false;
!       }
!     }
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
*************** vectorizable_load (gimple stmt, gimple_s
*** 6136,6169 ****
        prev_stmt_info = NULL;
        running_off = offvar;
        alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
        for (j = 0; j < ncopies; j++)
        {
          tree vec_inv;
  
!         vec_alloc (v, nunits);
!         for (i = 0; i < nunits; i++)
            {
!             tree newref, newoff;
!             gimple incr;
!             newref = build2 (MEM_REF, TREE_TYPE (vectype),
!                              running_off, alias_off);
! 
!             newref = force_gimple_operand_gsi (gsi, newref, true,
!                                                NULL_TREE, true,
!                                                GSI_SAME_STMT);
!             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
!             newoff = copy_ssa_name (running_off);
!             incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                          running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
  
              running_off = newoff;
            }
  
!         vec_inv = build_constructor (vectype, v);
!         new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
!         new_stmt = SSA_NAME_DEF_STMT (new_temp);
! 
          if (j == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
          else
--- 6156,6220 ----
        prev_stmt_info = NULL;
        running_off = offvar;
        alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+       int nloads = nunits;
+       tree ltype = TREE_TYPE (vectype);
+       if (slp)
+       {
+         nloads = nunits / group_size;
+         if (group_size < nunits)
+           ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+         else
+           ltype = vectype;
+         ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+         ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         gcc_assert (!slp_perm);
+       }
        for (j = 0; j < ncopies; j++)
        {
          tree vec_inv;
  
!         if (nloads > 1)
            {
!             vec_alloc (v, nloads);
!             for (i = 0; i < nloads; i++)
!               {
!                 tree newref, newoff;
!                 gimple incr;
!                 newref = build2 (MEM_REF, ltype, running_off, alias_off);
! 
!                 newref = force_gimple_operand_gsi (gsi, newref, true,
!                                                    NULL_TREE, true,
!                                                    GSI_SAME_STMT);
!                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
!                 newoff = copy_ssa_name (running_off);
!                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
!                                             running_off, stride_step);
!                 vect_finish_stmt_generation (stmt, incr, gsi);
! 
!                 running_off = newoff;
!               }
! 
!             vec_inv = build_constructor (vectype, v);
!             new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
!             new_stmt = SSA_NAME_DEF_STMT (new_temp);
!           }
!         else
!           {
!             new_stmt = gimple_build_assign (make_ssa_name (ltype),
!                                             build2 (MEM_REF, ltype,
!                                                     running_off, alias_off));
!             vect_finish_stmt_generation (stmt, new_stmt, gsi);
! 
!             tree newoff = copy_ssa_name (running_off);
!             gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
                                          running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
  
              running_off = newoff;
            }
  
!         if (slp)
!           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
          if (j == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
          else
Index: gcc/testsuite/gcc.dg/vect/slp-41.c
===================================================================
*** /dev/null   1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/vect/slp-41.c  2015-05-08 13:26:23.916726022 +0200
***************
*** 0 ****
--- 1,69 ----
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-require-effective-target vect_pack_trunc } */
+ /* { dg-require-effective-target vect_unpack } */
+ /* { dg-require-effective-target vect_hw_misalign } */
+ 
+ #include "tree-vect.h"
+ 
+ void __attribute__((noinline,noclone))
+ testi (int *p, short *q, int stride, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     {
+       q[i*4+0] = p[i*stride+0];
+       q[i*4+1] = p[i*stride+1];
+       q[i*4+2] = p[i*stride+2];
+       q[i*4+3] = p[i*stride+3];
+     }
+ }
+ 
+ void __attribute__((noinline,noclone))
+ testi2 (int *q, short *p, int stride, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     {
+       q[i*4+0] = p[i*stride+0];
+       q[i*4+1] = p[i*stride+1];
+       q[i*4+2] = p[i*stride+2];
+       q[i*4+3] = p[i*stride+3];
+     }
+ }
+ 
+ int ia[256];
+ short sa[256];
+ 
+ extern void abort (void);
+ 
+ int main()
+ {
+   int i;
+ 
+   check_vect ();
+ 
+   for (i = 0; i < 256; ++i)
+     {
+       ia[i] = sa[i] = i;
+        __asm__ volatile ("");
+     }
+   testi (ia, sa, 8, 32);
+   for (i = 0; i < 128; ++i)
+     if (sa[i] != ia[(i / 4) * 8 + i % 4])
+       abort ();
+ 
+   for (i = 0; i < 256; ++i)
+     {
+       ia[i] = sa[i] = i;
+        __asm__ volatile ("");
+     }
+   testi2 (ia, sa, 8, 32);
+   for (i = 0; i < 128; ++i)
+     if (ia[i] != sa[(i / 4) * 8 + i % 4])
+       abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to