On Wed, 2015-09-16 at 09:16 -0400, Trevor Saunders wrote:
> Hi,
> 
> I gave changing from gimple to gimple * a shot last week.  It turned out
> to be not too hard.  As you might expect the patch is huge so its
> attached compressed.
> 
> patch was bootstrapped + regtested on x86_64-linux-gnu, and run through
> config-list.mk.  However I needed to update it some for changes made
> while testing.  Do people want to make this change now?  If so I'll try
> and commit the patch over the weekend when less is changing.


FWIW there are some big changes in gcc/tree-vect-slp.c:vectorizable_load
that looks like unrelated whitespace changes, e.g. the following (and
there are some followup hunks).  Did something change underneath, or was
there a stray whitespace cleanup here?  (I skimmed through the patch,
and this was the only file I spotted where something looked wrong)


@@ -6755,473 +6754,473 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator 
*gsi, gimple *vec_stmt,
   gcc_assert (alignment_support_scheme);
   /* Targets with load-lane instructions must not require explicit
      realignment.  */
-  gcc_assert (!load_lanes_p
-             || alignment_support_scheme == dr_aligned
-             || alignment_support_scheme == dr_unaligned_supported);
-
-  /* In case the vectorization factor (VF) is bigger than the number
-     of elements that we can fit in a vectype (nunits), we have to generate
-     more than one vector stmt - i.e - we need to "unroll" the
-     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
-     from one copy of the vector stmt to the next, in the field
-     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
-     stages to find the correct vector defs to be used when vectorizing
-     stmts that use the defs of the current stmt.  The example below
-     illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
-     need to create 4 vectorized stmts):
-
-     before vectorization:
-                                RELATED_STMT    VEC_STMT
-        S1:     x = memref      -               -
-        S2:     z = x + 1       -               -
-
-     step 1: vectorize stmt S1:
-        We first create the vector stmt VS1_0, and, as usual, record a
-        pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
-        Next, we create the vector stmt VS1_1, and record a pointer to
-        it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
-        Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
-        stmts and pointers:
-                                RELATED_STMT    VEC_STMT
-        VS1_0:  vx0 = memref0   VS1_1           -
-        VS1_1:  vx1 = memref1   VS1_2           -
-        VS1_2:  vx2 = memref2   VS1_3           -
-        VS1_3:  vx3 = memref3   -               -
-        S1:     x = load        -               VS1_0
-        S2:     z = x + 1       -               -
-
-     See in documentation in vect_get_vec_def_for_stmt_copy for how the
-     information we recorded in RELATED_STMT field is used to vectorize
-     stmt S2.  */
-
-  /* In case of interleaving (non-unit grouped access):
-
-     S1:  x2 = &base + 2
-     S2:  x0 = &base
-     S3:  x1 = &base + 1
-     S4:  x3 = &base + 3
-
-     Vectorized loads are created in the order of memory accesses
-     starting from the access of the first stmt of the chain:
-
-     VS1: vx0 = &base
-     VS2: vx1 = &base + vec_size*1
-     VS3: vx3 = &base + vec_size*2
-     VS4: vx4 = &base + vec_size*3
-
-     Then permutation statements are generated:
-
-     VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
-     VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
-       ...
-
-     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
-     (the order of the data-refs in the output of vect_permute_load_chain
-     corresponds to the order of scalar stmts in the interleaving chain - see
-     the documentation of vect_permute_load_chain()).
-     The generation of permutation stmts and recording them in
-     STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
-
-     In case of both multiple types and interleaving, the vector loads and
-     permutation stmts above are created for every copy.  The result vector
-     stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
-     corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
-
-  /* If the data reference is aligned (dr_aligned) or potentially unaligned
-     on a target that supports unaligned accesses (dr_unaligned_supported)
-     we generate the following code:
-         p = initial_addr;
-         indx = 0;
-         loop {
-          p = p + indx * vectype_size;
-           vec_dest = *(p);
-           indx = indx + 1;
-         }
-
-     Otherwise, the data reference is potentially unaligned on a target that
-     does not support unaligned accesses (dr_explicit_realign_optimized) -
-     then generate the following code, in which the data in each iteration is
-     obtained by two vector loads, one from the previous iteration, and one
-     from the current iteration:
-         p1 = initial_addr;
-         msq_init = *(floor(p1))
-         p2 = initial_addr + VS - 1;
-         realignment_token = call target_builtin;
-         indx = 0;
-         loop {
-           p2 = p2 + indx * vectype_size
-           lsq = *(floor(p2))
-           vec_dest = realign_load (msq, lsq, realignment_token)
-           indx = indx + 1;
-           msq = lsq;
-         }   */
-
-  /* If the misalignment remains the same throughout the execution of the
-     loop, we can create the init_addr and permutation mask at the loop
-     preheader.  Otherwise, it needs to be created inside the loop.
-     This can only occur when vectorizing memory accesses in the inner-loop
-     nested within an outer-loop that is being vectorized.  */
-
-  if (nested_in_vect_loop
-      && (TREE_INT_CST_LOW (DR_STEP (dr))
-         % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
-    {
-      gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
-      compute_in_loop = true;
-    }
-
-  if ((alignment_support_scheme == dr_explicit_realign_optimized
-       || alignment_support_scheme == dr_explicit_realign)
-      && !compute_in_loop)
-    {
-      msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
-                                   alignment_support_scheme, NULL_TREE,
-                                   &at_loop);
-      if (alignment_support_scheme == dr_explicit_realign_optimized)
-       {
-         phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
-         byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
-                                   size_one_node);
-       }
-    }
-  else
-    at_loop = loop;
-
-  if (negative)
-    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
-
-  if (load_lanes_p)
-    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
-  else
-    aggr_type = vectype;
+             gcc_assert (!load_lanes_p
+                         || alignment_support_scheme == dr_aligned
+                         || alignment_support_scheme == 
dr_unaligned_supported);
+
+             /* In case the vectorization factor (VF) is bigger than the number
+                of elements that we can fit in a vectype (nunits), we have to 
generate
+                more than one vector stmt - i.e - we need to "unroll" the
+                vector stmt by a factor VF/nunits.  In doing so, we record a 
pointer
+                from one copy of the vector stmt to the next, in the field
+                STMT_VINFO_RELATED_STMT.  This is necessary in order to allow 
following
+                stages to find the correct vector defs to be used when 
vectorizing
+                stmts that use the defs of the current stmt.  The example below
+                illustrates the vectorization process when VF=16 and nunits=4 
(i.e., we
+                need to create 4 vectorized stmts):
+
+                before vectorization:
+                RELATED_STMT    VEC_STMT
+                S1:     x = memref      -               -
+S2:     z = x + 1       -               -
+
+step 1: vectorize stmt S1:
+We first create the vector stmt VS1_0, and, as usual, record a
+pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
+Next, we create the vector stmt VS1_1, and record a pointer to
+it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
+Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
+stmts and pointers:
+RELATED_STMT    VEC_STMT
+VS1_0:  vx0 = memref0   VS1_1           -
+VS1_1:  vx1 = memref1   VS1_2           -
+VS1_2:  vx2 = memref2   VS1_3           -
+VS1_3:  vx3 = memref3   -               -
+S1:     x = load        -               VS1_0
+S2:     z = x + 1       -               -
+
+See in documentation in vect_get_vec_def_for_stmt_copy for how the
+information we recorded in RELATED_STMT field is used to vectorize
+stmt S2.  */
+
+         /* In case of interleaving (non-unit grouped access):
+
+S1:  x2 = &base + 2
+S2:  x0 = &base
+S3:  x1 = &base + 1
+S4:  x3 = &base + 3
+
+Vectorized loads are created in the order of memory accesses
+starting from the access of the first stmt of the chain:
+
+VS1: vx0 = &base
+VS2: vx1 = &base + vec_size*1
+VS3: vx3 = &base + vec_size*2
+VS4: vx4 = &base + vec_size*3
+
+Then permutation statements are generated:
+
+VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
+VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
+...
+
+And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
+(the order of the data-refs in the output of vect_permute_load_chain
+corresponds to the order of scalar stmts in the interleaving chain - see
+the documentation of vect_permute_load_chain()).
+The generation of permutation stmts and recording them in
+STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
+
+In case of both multiple types and interleaving, the vector loads and
+permutation stmts above are created for every copy.  The result vector
+stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
+corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
+
+             /* If the data reference is aligned (dr_aligned) or potentially 
unaligned
+                on a target that supports unaligned accesses 
(dr_unaligned_supported)
+                we generate the following code:
+                p = initial_addr;
+                indx = 0;
+                loop {
+                p = p + indx * vectype_size;
+                vec_dest = *(p);
+                indx = indx + 1;
+                }
+
+                Otherwise, the data reference is potentially unaligned on a 
target that
+                does not support unaligned accesses 
(dr_explicit_realign_optimized) -
+                then generate the following code, in which the data in each 
iteration is
+                obtained by two vector loads, one from the previous iteration, 
and one
+                from the current iteration:
+                p1 = initial_addr;
+                msq_init = *(floor(p1))
+                p2 = initial_addr + VS - 1;
+                realignment_token = call target_builtin;
+                indx = 0;
+                loop {
+                p2 = p2 + indx * vectype_size
+                lsq = *(floor(p2))
+                vec_dest = realign_load (msq, lsq, realignment_token)
+                indx = indx + 1;
+                msq = lsq;
+                }   */
+
+             /* If the misalignment remains the same throughout the execution 
of the
+                loop, we can create the init_addr and permutation mask at the 
loop
+                preheader.  Otherwise, it needs to be created inside the loop.
+                This can only occur when vectorizing memory accesses in the 
inner-loop
+                nested within an outer-loop that is being vectorized.  */
+
+       if (nested_in_vect_loop
+           && (TREE_INT_CST_LOW (DR_STEP (dr))
+               % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
+         {
+           gcc_assert (alignment_support_scheme != 
dr_explicit_realign_optimized);
+           compute_in_loop = true;
+         }
 
-  prev_stmt_info = NULL;
-  for (j = 0; j < ncopies; j++)
-    {
-      /* 1. Create the vector or array pointer update chain.  */
-      if (j == 0)
+      if ((alignment_support_scheme == dr_explicit_realign_optimized
+          || alignment_support_scheme == dr_explicit_realign)
+         && !compute_in_loop)
        {
-         bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
-         if (simd_lane_access_p
-             && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
-             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
-             && integer_zerop (DR_OFFSET (first_dr))
-             && integer_zerop (DR_INIT (first_dr))
-             && alias_sets_conflict_p (get_alias_set (aggr_type),
-                                       get_alias_set (DR_REF (first_dr)))
-             && (alignment_support_scheme == dr_aligned
-                 || alignment_support_scheme == dr_unaligned_supported))
+         msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
+                                       alignment_support_scheme, NULL_TREE,
+                                       &at_loop);
+         if (alignment_support_scheme == dr_explicit_realign_optimized)
            {
-             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
-             dataref_offset = build_int_cst (reference_alias_ptr_type
-                                             (DR_REF (first_dr)), 0);
-             inv_p = false;
+             phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
+             byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
+                                       size_one_node);
            }
-         else
-           dataref_ptr
-             = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
-                                         offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, &inv_p,
-                                         byte_offset);
        }
-      else if (dataref_offset)
-       dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
-                                         TYPE_SIZE_UNIT (aggr_type));
       else
-        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-                                      TYPE_SIZE_UNIT (aggr_type));
+       at_loop = loop;
 
-      if (grouped_load || slp_perm)
-       dr_chain.create (vec_num);
+      if (negative)
+       offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
 
       if (load_lanes_p)
-       {
-         tree vec_array;
-
-         vec_array = create_vector_array (vectype, vec_num);
-
-         /* Emit:
-              VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
-         data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
-         new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
-         gimple_call_set_lhs (new_stmt, vec_array);
-         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+       aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+      else
+       aggr_type = vectype;
 
-         /* Extract each vector into an SSA_NAME.  */
-         for (i = 0; i < vec_num; i++)
+      prev_stmt_info = NULL;
+      for (j = 0; j < ncopies; j++)
+       {
+         /* 1. Create the vector or array pointer update chain.  */
+         if (j == 0)
            {
-             new_temp = read_vector_array (stmt, gsi, scalar_dest,
-                                           vec_array, i);
-             dr_chain.quick_push (new_temp);
+             bool simd_lane_access_p
+               = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
+             if (simd_lane_access_p
+                 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
+                 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
+                 && integer_zerop (DR_OFFSET (first_dr))
+                 && integer_zerop (DR_INIT (first_dr))
+                 && alias_sets_conflict_p (get_alias_set (aggr_type),
+                                           get_alias_set (DR_REF (first_dr)))
+                 && (alignment_support_scheme == dr_aligned
+                     || alignment_support_scheme == dr_unaligned_supported))
+               {
+                 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
+                 dataref_offset = build_int_cst (reference_alias_ptr_type
+                                                 (DR_REF (first_dr)), 0);
+                 inv_p = false;
+               }
+             else
+               dataref_ptr
+                 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
+                                             offset, &dummy, gsi, &ptr_incr,
+                                             simd_lane_access_p, &inv_p,
+                                             byte_offset);
            }
+         else if (dataref_offset)
+           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
+                                             TYPE_SIZE_UNIT (aggr_type));
+         else
+           dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+                                          TYPE_SIZE_UNIT (aggr_type));
 
-         /* Record the mapping between SSA_NAMEs and statements.  */
-         vect_record_grouped_load_vectors (stmt, dr_chain);
-       }
-      else
-       {
-         for (i = 0; i < vec_num; i++)
+         if (grouped_load || slp_perm)
+           dr_chain.create (vec_num);
+
+         if (load_lanes_p)
            {
-             if (i > 0)
-               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                              stmt, NULL_TREE);
+             tree vec_array;
 
-             /* 2. Create the vector-load in the loop.  */
-             switch (alignment_support_scheme)
-               {
-               case dr_aligned:
-               case dr_unaligned_supported:
-                 {
-                   unsigned int align, misalign;
-
-                   data_ref
-                     = fold_build2 (MEM_REF, vectype, dataref_ptr,
-                                    dataref_offset
-                                    ? dataref_offset
-                                    : build_int_cst (reference_alias_ptr_type
-                                                     (DR_REF (first_dr)), 0));
-                   align = TYPE_ALIGN_UNIT (vectype);
-                   if (alignment_support_scheme == dr_aligned)
-                     {
-                       gcc_assert (aligned_access_p (first_dr));
-                       misalign = 0;
-                     }
-                   else if (DR_MISALIGNMENT (first_dr) == -1)
-                     {
-                       if (DR_VECT_AUX (first_dr)->base_element_aligned)
-                         align = TYPE_ALIGN_UNIT (elem_type);
-                       else
-                         align = (get_object_alignment (DR_REF (first_dr))
-                                  / BITS_PER_UNIT);
-                       misalign = 0;
-                       TREE_TYPE (data_ref)
-                         = build_aligned_type (TREE_TYPE (data_ref),
-                                               align * BITS_PER_UNIT);
-                     }
-                   else
-                     {
-                       TREE_TYPE (data_ref)
-                         = build_aligned_type (TREE_TYPE (data_ref),
-                                               TYPE_ALIGN (elem_type));
-                       misalign = DR_MISALIGNMENT (first_dr);
-                     }
-                   if (dataref_offset == NULL_TREE
-                       && TREE_CODE (dataref_ptr) == SSA_NAME)
-                     set_ptr_info_alignment (get_ptr_info (dataref_ptr),
-                                             align, misalign);
-                   break;
-                 }
-               case dr_explicit_realign:
-                 {
-                   tree ptr, bump;
-
-                   tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
-
-                   if (compute_in_loop)
-                     msq = vect_setup_realignment (first_stmt, gsi,
-                                                   &realignment_token,
-                                                   dr_explicit_realign,
-                                                   dataref_ptr, NULL);
-
-                   if (TREE_CODE (dataref_ptr) == SSA_NAME)
-                     ptr = copy_ssa_name (dataref_ptr);
-                   else
-                     ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
-                   new_stmt = gimple_build_assign
-                                (ptr, BIT_AND_EXPR, dataref_ptr,
-                                 build_int_cst
-                                 (TREE_TYPE (dataref_ptr),
-                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
-                   data_ref
-                     = build2 (MEM_REF, vectype, ptr,
-                               build_int_cst (reference_alias_ptr_type
-                                                (DR_REF (first_dr)), 0));
-                   vec_dest = vect_create_destination_var (scalar_dest,
-                                                           vectype);
-                   new_stmt = gimple_build_assign (vec_dest, data_ref);
-                   new_temp = make_ssa_name (vec_dest, new_stmt);
-                   gimple_assign_set_lhs (new_stmt, new_temp);
-                   gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-                   gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
-                   msq = new_temp;
-
-                   bump = size_binop (MULT_EXPR, vs,
-                                      TYPE_SIZE_UNIT (elem_type));
-                   bump = size_binop (MINUS_EXPR, bump, size_one_node);
-                   ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
-                   new_stmt = gimple_build_assign
-                                (NULL_TREE, BIT_AND_EXPR, ptr,
-                                 build_int_cst
-                                 (TREE_TYPE (ptr),
-                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-                   ptr = copy_ssa_name (ptr, new_stmt);
-                   gimple_assign_set_lhs (new_stmt, ptr);
-                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
-                   data_ref
-                     = build2 (MEM_REF, vectype, ptr,
-                               build_int_cst (reference_alias_ptr_type
-                                                (DR_REF (first_dr)), 0));
-                   break;
-                 }
-               case dr_explicit_realign_optimized:
-                 if (TREE_CODE (dataref_ptr) == SSA_NAME)
-                   new_temp = copy_ssa_name (dataref_ptr);
-                 else
-                   new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
-                 new_stmt = gimple_build_assign
-                              (new_temp, BIT_AND_EXPR, dataref_ptr,
-                               build_int_cst
-                                 (TREE_TYPE (dataref_ptr),
-                                  -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
-                 data_ref
-                   = build2 (MEM_REF, vectype, new_temp,
-                             build_int_cst (reference_alias_ptr_type
-                                              (DR_REF (first_dr)), 0));
-                 break;
-               default:
-                 gcc_unreachable ();
-               }
-             vec_dest = vect_create_destination_var (scalar_dest, vectype);
-             new_stmt = gimple_build_assign (vec_dest, data_ref);
-             new_temp = make_ssa_name (vec_dest, new_stmt);
-             gimple_assign_set_lhs (new_stmt, new_temp);
+             vec_array = create_vector_array (vectype, vec_num);
+
+             /* Emit:
+                VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
+             data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+             new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, 
data_ref);
+             gimple_call_set_lhs (new_stmt, vec_array);
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-             /* 3. Handle explicit realignment if necessary/supported.
-                Create in loop:
-                  vec_dest = realign_load (msq, lsq, realignment_token)  */
-             if (alignment_support_scheme == dr_explicit_realign_optimized
-                 || alignment_support_scheme == dr_explicit_realign)
+             /* Extract each vector into an SSA_NAME.  */
+             for (i = 0; i < vec_num; i++)
                {
-                 lsq = gimple_assign_lhs (new_stmt);
-                 if (!realignment_token)
-                   realignment_token = dataref_ptr;
-                 vec_dest = vect_create_destination_var (scalar_dest, vectype);
-                 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
-                                                 msq, lsq, realignment_token);
-                 new_temp = make_ssa_name (vec_dest, new_stmt);
-                 gimple_assign_set_lhs (new_stmt, new_temp);
-                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-                 if (alignment_support_scheme == dr_explicit_realign_optimized)
-                   {
-                     gcc_assert (phi);
-                     if (i == vec_num - 1 && j == ncopies - 1)
-                       add_phi_arg (phi, lsq,
-                                    loop_latch_edge (containing_loop),
-                                    UNKNOWN_LOCATION);
-                     msq = lsq;
-                   }
+                 new_temp = read_vector_array (stmt, gsi, scalar_dest,
+                                               vec_array, i);
+                 dr_chain.quick_push (new_temp);
                }
 
-             /* 4. Handle invariant-load.  */
-             if (inv_p && !bb_vinfo)
+             /* Record the mapping between SSA_NAMEs and statements.  */
+             vect_record_grouped_load_vectors (stmt, dr_chain);
+           }
+         else
+           {
+             for (i = 0; i < vec_num; i++)
                {
-                 gcc_assert (!grouped_load);
-                 /* If we have versioned for aliasing or the loop doesn't
-                    have any data dependencies that would preclude this,
-                    then we are sure this is a loop invariant load and
-                    thus we can insert it on the preheader edge.  */
-                 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
-                     && !nested_in_vect_loop
-                     && hoist_defs_of_uses (stmt, loop))
+                 if (i > 0)
+                   dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+                                                  stmt, NULL_TREE);
+
+                 /* 2. Create the vector-load in the loop.  */
+                 switch (alignment_support_scheme)
                    {
-                     if (dump_enabled_p ())
+                   case dr_aligned:
+                   case dr_unaligned_supported:
                        {
-                         dump_printf_loc (MSG_NOTE, vect_location,
-                                          "hoisting out of the vectorized "
-                                          "loop: ");
-                         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+                         unsigned int align, misalign;
+
+                         data_ref
+                           = fold_build2 (MEM_REF, vectype, dataref_ptr,
+                                          dataref_offset
+                                          ? dataref_offset
+                                          : build_int_cst 
(reference_alias_ptr_type
+                                                           (DR_REF 
(first_dr)), 0));
+                         align = TYPE_ALIGN_UNIT (vectype);
+                         if (alignment_support_scheme == dr_aligned)
+                           {
+                             gcc_assert (aligned_access_p (first_dr));
+                             misalign = 0;
+                           }
+                         else if (DR_MISALIGNMENT (first_dr) == -1)
+                           {
+                             if (DR_VECT_AUX (first_dr)->base_element_aligned)
+                               align = TYPE_ALIGN_UNIT (elem_type);
+                             else
+                               align = (get_object_alignment (DR_REF 
(first_dr))
+                                        / BITS_PER_UNIT);
+                             misalign = 0;
+                             TREE_TYPE (data_ref)
+                               = build_aligned_type (TREE_TYPE (data_ref),
+                                                     align * BITS_PER_UNIT);
+                           }
+                         else
+                           {
+                             TREE_TYPE (data_ref)
+                               = build_aligned_type (TREE_TYPE (data_ref),
+                                                     TYPE_ALIGN (elem_type));
+                             misalign = DR_MISALIGNMENT (first_dr);
+                           }
+                         if (dataref_offset == NULL_TREE
+                             && TREE_CODE (dataref_ptr) == SSA_NAME)
+                           set_ptr_info_alignment (get_ptr_info (dataref_ptr),
+                                                   align, misalign);
+                         break;
                        }
-                     tree tem = copy_ssa_name (scalar_dest);
-                     gsi_insert_on_edge_immediate
-                       (loop_preheader_edge (loop),
-                        gimple_build_assign (tem,
-                                             unshare_expr
-                                               (gimple_assign_rhs1 (stmt))));
-                     new_temp = vect_init_vector (stmt, tem, vectype, NULL);
-                   }
-                 else
-                   {
-                     gimple_stmt_iterator gsi2 = *gsi;
-                     gsi_next (&gsi2);
-                     new_temp = vect_init_vector (stmt, scalar_dest,
-                                                  vectype, &gsi2);
-                   }
-                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
-                 set_vinfo_for_stmt (new_stmt,
-                                     new_stmt_vec_info (new_stmt, loop_vinfo,
-                                                        bb_vinfo));
-               }
+                   case dr_explicit_realign:
+                       {
+                         tree ptr, bump;
 
-             if (negative)
-               {
-                 tree perm_mask = perm_mask_for_reverse (vectype);
-                 new_temp = permute_vec_elements (new_temp, new_temp,
-                                                  perm_mask, stmt, gsi);
-                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
-               }
+                         tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
-             /* Collect vector loads and later create their permutation in
-                vect_transform_grouped_load ().  */
-             if (grouped_load || slp_perm)
-               dr_chain.quick_push (new_temp);
+                         if (compute_in_loop)
+                           msq = vect_setup_realignment (first_stmt, gsi,
+                                                         &realignment_token,
+                                                         dr_explicit_realign,
+                                                         dataref_ptr, NULL);
 
-             /* Store vector loads in the corresponding SLP_NODE.  */
-             if (slp && !slp_perm)
-               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
-           }
-         /* Bump the vector pointer to account for a gap or for excess
-            elements loaded for a permuted SLP load.  */
-         if (group_gap_adj != 0)
-           {
-             bool ovf;
-             tree bump
-               = wide_int_to_tree (sizetype,
-                                   wi::smul (TYPE_SIZE_UNIT (elem_type),
-                                             group_gap_adj, &ovf));
-             dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
-                                            stmt, bump);
+                         if (TREE_CODE (dataref_ptr) == SSA_NAME)
+                           ptr = copy_ssa_name (dataref_ptr);
+                         else
+                           ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+                         new_stmt = gimple_build_assign
+                           (ptr, BIT_AND_EXPR, dataref_ptr,
+                            build_int_cst
+                            (TREE_TYPE (dataref_ptr),
+                             -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                         data_ref
+                           = build2 (MEM_REF, vectype, ptr,
+                                     build_int_cst (reference_alias_ptr_type
+                                                    (DR_REF (first_dr)), 0));
+                         vec_dest = vect_create_destination_var (scalar_dest,
+                                                                 vectype);
+                         new_stmt = gimple_build_assign (vec_dest, data_ref);
+                         new_temp = make_ssa_name (vec_dest, new_stmt);
+                         gimple_assign_set_lhs (new_stmt, new_temp);
+                         gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+                         gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                         msq = new_temp;
+
+                         bump = size_binop (MULT_EXPR, vs,
+                                            TYPE_SIZE_UNIT (elem_type));
+                         bump = size_binop (MINUS_EXPR, bump, size_one_node);
+                         ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, 
bump);
+                         new_stmt = gimple_build_assign
+                           (NULL_TREE, BIT_AND_EXPR, ptr,
+                            build_int_cst
+                            (TREE_TYPE (ptr),
+                             -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                         ptr = copy_ssa_name (ptr, new_stmt);
+                         gimple_assign_set_lhs (new_stmt, ptr);
+                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                         data_ref
+                           = build2 (MEM_REF, vectype, ptr,
+                                     build_int_cst (reference_alias_ptr_type
+                                                    (DR_REF (first_dr)), 0));
+                         break;
+                       }
+                   case dr_explicit_realign_optimized:
+                     if (TREE_CODE (dataref_ptr) == SSA_NAME)
+                       new_temp = copy_ssa_name (dataref_ptr);
+                     else
+                       new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
+                     new_stmt = gimple_build_assign
+                       (new_temp, BIT_AND_EXPR, dataref_ptr,
+                        build_int_cst
+                        (TREE_TYPE (dataref_ptr),
+                         -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
+                     data_ref
+                       = build2 (MEM_REF, vectype, new_temp,
+                                 build_int_cst (reference_alias_ptr_type
+                                                (DR_REF (first_dr)), 0));
+                     break;
+                   default:
+                     gcc_unreachable ();
+                   }
+                 vec_dest = vect_create_destination_var (scalar_dest, vectype);
+                 new_stmt = gimple_build_assign (vec_dest, data_ref);
+                 new_temp = make_ssa_name (vec_dest, new_stmt);
+                 gimple_assign_set_lhs (new_stmt, new_temp);
+                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+                 /* 3. Handle explicit realignment if necessary/supported.
+                    Create in loop:
+                    vec_dest = realign_load (msq, lsq, realignment_token)  */
+                         if (alignment_support_scheme == 
dr_explicit_realign_optimized
+                             || alignment_support_scheme == 
dr_explicit_realign)
+                           {
+                             lsq = gimple_assign_lhs (new_stmt);
+                             if (!realignment_token)
+                               realignment_token = dataref_ptr;
+                             vec_dest = vect_create_destination_var 
(scalar_dest, vectype);
+                             new_stmt = gimple_build_assign (vec_dest, 
REALIGN_LOAD_EXPR,
+                                                             msq, lsq, 
realignment_token);
+                             new_temp = make_ssa_name (vec_dest, new_stmt);
+                             gimple_assign_set_lhs (new_stmt, new_temp);
+                             vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+                             if (alignment_support_scheme == 
dr_explicit_realign_optimized)
+                               {
+                                 gcc_assert (phi);
+                                 if (i == vec_num - 1 && j == ncopies - 1)
+                                   add_phi_arg (phi, lsq,
+                                                loop_latch_edge 
(containing_loop),
+                                                UNKNOWN_LOCATION);
+                                 msq = lsq;
+                               }
+                           }
+
+                         /* 4. Handle invariant-load.  */
+                         if (inv_p && !bb_vinfo)
+                           {
+                             gcc_assert (!grouped_load);
+                             /* If we have versioned for aliasing or the loop 
doesn't
+                                have any data dependencies that would preclude 
this,
+                                then we are sure this is a loop invariant load 
and
+                                thus we can insert it on the preheader edge.  
*/
+                             if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
+                                 && !nested_in_vect_loop
+                                 && hoist_defs_of_uses (stmt, loop))
+                               {
+                                 if (dump_enabled_p ())
+                                   {
+                                     dump_printf_loc (MSG_NOTE, vect_location,
+                                                      "hoisting out of the 
vectorized "
+                                                      "loop: ");
+                                     dump_gimple_stmt (MSG_NOTE, TDF_SLIM, 
stmt, 0);
+                                   }
+                                 tree tem = copy_ssa_name (scalar_dest);
+                                 gsi_insert_on_edge_immediate
+                                   (loop_preheader_edge (loop),
+                                    gimple_build_assign (tem,
+                                                         unshare_expr
+                                                         (gimple_assign_rhs1 
(stmt))));
+                                 new_temp = vect_init_vector (stmt, tem, 
vectype, NULL);
+                               }
+                             else
+                               {
+                                 gimple_stmt_iterator gsi2 = *gsi;
+                                 gsi_next (&gsi2);
+                                 new_temp = vect_init_vector (stmt, 
scalar_dest,
+                                                              vectype, &gsi2);
+                               }
+                             new_stmt = SSA_NAME_DEF_STMT (new_temp);
+                             set_vinfo_for_stmt (new_stmt,
+                                                 new_stmt_vec_info (new_stmt, 
loop_vinfo,
+                                                                    bb_vinfo));
+                           }
+
+                         if (negative)
+                           {
+                             tree perm_mask = perm_mask_for_reverse (vectype);
+                             new_temp = permute_vec_elements (new_temp, 
new_temp,
+                                                              perm_mask, stmt, 
gsi);
+                             new_stmt = SSA_NAME_DEF_STMT (new_temp);
+                           }
+
+                         /* Collect vector loads and later create their 
permutation in
+                            vect_transform_grouped_load ().  */
+                         if (grouped_load || slp_perm)
+                           dr_chain.quick_push (new_temp);
+
+                         /* Store vector loads in the corresponding SLP_NODE.  
*/
+                         if (slp && !slp_perm)
+                           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+               }
+             /* Bump the vector pointer to account for a gap or for excess
+                elements loaded for a permuted SLP load.  */
+                             if (group_gap_adj != 0)
+                               {
+                                 bool ovf;
+                                 tree bump
+                                   = wide_int_to_tree (sizetype,
+                                                       wi::smul 
(TYPE_SIZE_UNIT (elem_type),
+                                                                 
group_gap_adj, &ovf));
+                                 dataref_ptr = bump_vector_ptr (dataref_ptr, 
ptr_incr, gsi,
+                                                                stmt, bump);
+                               }
            }
-       }
 
-      if (slp && !slp_perm)
-       continue;
+         if (slp && !slp_perm)
+           continue;
 
-      if (slp_perm)
-        {
-          if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
-                                             slp_node_instance, false))
-            {
-              dr_chain.release ();
-              return false;
-            }
-        }
-      else
-        {
-          if (grouped_load)
-           {
-             if (!load_lanes_p)
-               vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
-             *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+         if (slp_perm)
+           {
+             if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+                                                slp_node_instance, false))
+               {
+                 dr_chain.release ();
+                 return false;
+               }
            }
-          else
+         else
            {
-             if (j == 0)
-               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+             if (grouped_load)
+               {
+                 if (!load_lanes_p)
+                   vect_transform_grouped_load (stmt, dr_chain, group_size, 
gsi);
+                 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+               }
              else
-               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-             prev_stmt_info = vinfo_for_stmt (new_stmt);
+               {
+                 if (j == 0)
+                   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+                 else
+                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+                 prev_stmt_info = vinfo_for_stmt (new_stmt);
+               }
            }
-        }
-      dr_chain.release ();
-    }
+         dr_chain.release ();
+       }
 
-  return true;
+      return true;
 }
 
 /* Function vect_is_simple_cond.


Reply via email to