The following removes the non-SLP store interleaving support which
was already almost unused.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

        * tree-vectorizer.h (vect_grouped_store_supported): Remove.
        (vect_permute_store_chain): Likewise.
        * tree-vect-data-refs.cc (vect_grouped_store_supported): Remove.
        (vect_permute_store_chain): Likewise.
        * tree-vect-stmts.cc (vectorizable_store): Remove comment
        about store interleaving.
        * tree-vect-loop.cc (vect_analyze_loop_2): Do not consider
        store interleaving when disregarding single-lane SLP.
---
 gcc/tree-vect-data-refs.cc | 318 -------------------------------------
 gcc/tree-vect-loop.cc      |   3 +-
 gcc/tree-vect-stmts.cc     |  33 ----
 gcc/tree-vectorizer.h      |   4 -
 4 files changed, 1 insertion(+), 357 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index b38eecd7901..27be3202fec 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5954,126 +5954,6 @@ vect_create_destination_var (tree scalar_dest, tree 
vectype)
   return vec_dest;
 }
 
-/* Function vect_grouped_store_supported.
-
-   Returns TRUE if interleave high and interleave low permutations
-   are supported, and FALSE otherwise.  */
-
-bool
-vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
-{
-  machine_mode mode = TYPE_MODE (vectype);
-
-  /* vect_permute_store_chain requires the group size to be equal to 3 or
-     be a power of two.  */
-  if (count != 3 && exact_log2 (count) == -1)
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "the size of the group of accesses"
-                        " is not a power of 2 or not eqaul to 3\n");
-      return false;
-    }
-
-  /* Check that the permutation is supported.  */
-  if (VECTOR_MODE_P (mode))
-    {
-      unsigned int i;
-      if (count == 3)
-       {
-         unsigned int j0 = 0, j1 = 0, j2 = 0;
-         unsigned int i, j;
-
-         unsigned int nelt;
-         if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "cannot handle groups of 3 stores for"
-                                " variable-length vectors\n");
-             return false;
-           }
-
-         vec_perm_builder sel (nelt, nelt, 1);
-         sel.quick_grow (nelt);
-         vec_perm_indices indices;
-         for (j = 0; j < 3; j++)
-           {
-             int nelt0 = ((3 - j) * nelt) % 3;
-             int nelt1 = ((3 - j) * nelt + 1) % 3;
-             int nelt2 = ((3 - j) * nelt + 2) % 3;
-             for (i = 0; i < nelt; i++)
-               {
-                 if (3 * i + nelt0 < nelt)
-                   sel[3 * i + nelt0] = j0++;
-                 if (3 * i + nelt1 < nelt)
-                   sel[3 * i + nelt1] = nelt + j1++;
-                 if (3 * i + nelt2 < nelt)
-                   sel[3 * i + nelt2] = 0;
-               }
-             indices.new_vector (sel, 2, nelt);
-             if (!can_vec_perm_const_p (mode, mode, indices))
-               {
-                 if (dump_enabled_p ())
-                   dump_printf (MSG_MISSED_OPTIMIZATION,
-                                "permutation op not supported by target.\n");
-                 return false;
-               }
-
-             for (i = 0; i < nelt; i++)
-               {
-                 if (3 * i + nelt0 < nelt)
-                   sel[3 * i + nelt0] = 3 * i + nelt0;
-                 if (3 * i + nelt1 < nelt)
-                   sel[3 * i + nelt1] = 3 * i + nelt1;
-                 if (3 * i + nelt2 < nelt)
-                   sel[3 * i + nelt2] = nelt + j2++;
-               }
-             indices.new_vector (sel, 2, nelt);
-             if (!can_vec_perm_const_p (mode, mode, indices))
-               {
-                 if (dump_enabled_p ())
-                   dump_printf (MSG_MISSED_OPTIMIZATION,
-                                "permutation op not supported by target.\n");
-                 return false;
-               }
-           }
-         return true;
-       }
-      else
-       {
-         /* If length is not equal to 3 then only power of 2 is supported.  */
-         gcc_assert (pow2p_hwi (count));
-         poly_uint64 nelt = GET_MODE_NUNITS (mode);
-
-         /* The encoding has 2 interleaved stepped patterns.  */
-         if(!multiple_p (nelt, 2))
-           return false;
-         vec_perm_builder sel (nelt, 2, 3);
-         sel.quick_grow (6);
-         for (i = 0; i < 3; i++)
-           {
-             sel[i * 2] = i;
-             sel[i * 2 + 1] = i + nelt;
-           }
-         vec_perm_indices indices (sel, 2, nelt);
-         if (can_vec_perm_const_p (mode, mode, indices))
-           {
-             for (i = 0; i < 6; i++)
-               sel[i] += exact_div (nelt, 2);
-             indices.new_vector (sel, 2, nelt);
-             if (can_vec_perm_const_p (mode, mode, indices))
-               return true;
-           }
-       }
-    }
-
-  if (dump_enabled_p ())
-    dump_printf (MSG_MISSED_OPTIMIZATION,
-                "permutation op not supported by target.\n");
-  return false;
-}
-
 /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
    of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
 
@@ -6102,204 +5982,6 @@ vect_store_lanes_supported (tree vectype, unsigned 
HOST_WIDE_INT count,
 }
 
 
-/* Function vect_permute_store_chain.
-
-   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
-   a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
-   the data correctly for the stores.  Return the final references for stores
-   in RESULT_CHAIN.
-
-   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
-   The input is 4 vectors each containing 8 elements.  We assign a number to
-   each element, the input sequence is:
-
-   1st vec:   0  1  2  3  4  5  6  7
-   2nd vec:   8  9 10 11 12 13 14 15
-   3rd vec:  16 17 18 19 20 21 22 23
-   4th vec:  24 25 26 27 28 29 30 31
-
-   The output sequence should be:
-
-   1st vec:  0  8 16 24  1  9 17 25
-   2nd vec:  2 10 18 26  3 11 19 27
-   3rd vec:  4 12 20 28  5 13 21 30
-   4th vec:  6 14 22 30  7 15 23 31
-
-   i.e., we interleave the contents of the four vectors in their order.
-
-   We use interleave_high/low instructions to create such output.  The input of
-   each interleave_high/low operation is two vectors:
-   1st vec    2nd vec
-   0 1 2 3    4 5 6 7
-   the even elements of the result vector are obtained left-to-right from the
-   high/low elements of the first vector.  The odd elements of the result are
-   obtained left-to-right from the high/low elements of the second vector.
-   The output of interleave_high will be:   0 4 1 5
-   and of interleave_low:                   2 6 3 7
-
-
-   The permutation is done in log LENGTH stages.  In each stage interleave_high
-   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
-   where the first argument is taken from the first half of DR_CHAIN and the
-   second argument from it's second half.
-   In our example,
-
-   I1: interleave_high (1st vec, 3rd vec)
-   I2: interleave_low (1st vec, 3rd vec)
-   I3: interleave_high (2nd vec, 4th vec)
-   I4: interleave_low (2nd vec, 4th vec)
-
-   The output for the first stage is:
-
-   I1:  0 16  1 17  2 18  3 19
-   I2:  4 20  5 21  6 22  7 23
-   I3:  8 24  9 25 10 26 11 27
-   I4: 12 28 13 29 14 30 15 31
-
-   The output of the second stage, i.e. the final result is:
-
-   I1:  0  8 16 24  1  9 17 25
-   I2:  2 10 18 26  3 11 19 27
-   I3:  4 12 20 28  5 13 21 30
-   I4:  6 14 22 30  7 15 23 31.  */
-
-void
-vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
-                         unsigned int length,
-                         stmt_vec_info stmt_info,
-                         gimple_stmt_iterator *gsi,
-                         vec<tree> *result_chain)
-{
-  tree vect1, vect2, high, low;
-  gimple *perm_stmt;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree perm_mask_low, perm_mask_high;
-  tree data_ref;
-  tree perm3_mask_low, perm3_mask_high;
-  unsigned int i, j, n, log_length = exact_log2 (length);
-
-  result_chain->quick_grow (length);
-  memcpy (result_chain->address (), dr_chain.address (),
-         length * sizeof (tree));
-
-  if (length == 3)
-    {
-      /* vect_grouped_store_supported ensures that this is constant.  */
-      unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
-      unsigned int j0 = 0, j1 = 0, j2 = 0;
-
-      vec_perm_builder sel (nelt, nelt, 1);
-      sel.quick_grow (nelt);
-      vec_perm_indices indices;
-      for (j = 0; j < 3; j++)
-        {
-         int nelt0 = ((3 - j) * nelt) % 3;
-         int nelt1 = ((3 - j) * nelt + 1) % 3;
-         int nelt2 = ((3 - j) * nelt + 2) % 3;
-
-         for (i = 0; i < nelt; i++)
-           {
-             if (3 * i + nelt0 < nelt)
-               sel[3 * i + nelt0] = j0++;
-             if (3 * i + nelt1 < nelt)
-               sel[3 * i + nelt1] = nelt + j1++;
-             if (3 * i + nelt2 < nelt)
-               sel[3 * i + nelt2] = 0;
-           }
-         indices.new_vector (sel, 2, nelt);
-         perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-         for (i = 0; i < nelt; i++)
-           {
-             if (3 * i + nelt0 < nelt)
-               sel[3 * i + nelt0] = 3 * i + nelt0;
-             if (3 * i + nelt1 < nelt)
-               sel[3 * i + nelt1] = 3 * i + nelt1;
-             if (3 * i + nelt2 < nelt)
-               sel[3 * i + nelt2] = nelt + j2++;
-           }
-         indices.new_vector (sel, 2, nelt);
-         perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-         vect1 = dr_chain[0];
-         vect2 = dr_chain[1];
-
-         /* Create interleaving stmt:
-            low = VEC_PERM_EXPR <vect1, vect2,
-                                 {j, nelt, *, j + 1, nelt + j + 1, *,
-                                  j + 2, nelt + j + 2, *, ...}>  */
-         data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
-         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-                                          vect2, perm3_mask_low);
-         vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
-         vect1 = data_ref;
-         vect2 = dr_chain[2];
-         /* Create interleaving stmt:
-            low = VEC_PERM_EXPR <vect1, vect2,
-                                 {0, 1, nelt + j, 3, 4, nelt + j + 1,
-                                  6, 7, nelt + j + 2, ...}>  */
-         data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
-         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-                                          vect2, perm3_mask_high);
-         vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-         (*result_chain)[j] = data_ref;
-       }
-    }
-  else
-    {
-      /* If length is not equal to 3 then only power of 2 is supported.  */
-      gcc_assert (pow2p_hwi (length));
-
-      /* The encoding has 2 interleaved stepped patterns.  */
-      poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
-      vec_perm_builder sel (nelt, 2, 3);
-      sel.quick_grow (6);
-      for (i = 0; i < 3; i++)
-       {
-         sel[i * 2] = i;
-         sel[i * 2 + 1] = i + nelt;
-       }
-       vec_perm_indices indices (sel, 2, nelt);
-       perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-       for (i = 0; i < 6; i++)
-         sel[i] += exact_div (nelt, 2);
-       indices.new_vector (sel, 2, nelt);
-       perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-       for (i = 0, n = log_length; i < n; i++)
-         {
-           for (j = 0; j < length/2; j++)
-             {
-               vect1 = dr_chain[j];
-               vect2 = dr_chain[j+length/2];
-
-               /* Create interleaving stmt:
-                  high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
-                                                       ...}>  */
-               high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
-               perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
-                                                vect2, perm_mask_high);
-               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-               (*result_chain)[2*j] = high;
-
-               /* Create interleaving stmt:
-                  low = VEC_PERM_EXPR <vect1, vect2,
-                                       {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
-                                        ...}>  */
-               low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
-               perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
-                                                vect2, perm_mask_low);
-               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-               (*result_chain)[2*j+1] = low;
-             }
-           memcpy (dr_chain.address (), result_chain->address (),
-                   length * sizeof (tree));
-         }
-    }
-}
-
 /* Function vect_setup_realignment
 
    This function is called when vectorizing an unaligned load using
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index cb315e6bbf9..cbbe613930c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2883,8 +2883,7 @@ again:
       unsigned int size = DR_GROUP_SIZE (vinfo);
       tree vectype = STMT_VINFO_VECTYPE (vinfo);
       if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST
-        && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
-        && ! vect_grouped_store_supported (vectype, size))
+        && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
        return opt_result::failure_at (vinfo->stmt,
                                       "unsupported grouped store\n");
       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3b8b98978d3..7a115dbcfcb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8413,39 +8413,6 @@ vectorizable_store (vec_info *vinfo,
      more than one vector stmt - i.e - we need to "unroll" the
      vector stmt by a factor VF/nunits.  */
 
-  /* In case of interleaving (non-unit grouped access):
-
-        S1:  &base + 2 = x2
-        S2:  &base = x0
-        S3:  &base + 1 = x1
-        S4:  &base + 3 = x3
-
-     We create vectorized stores starting from base address (the access of the
-     first stmt in the chain (S2 in the above example), when the last store 
stmt
-     of the chain (S4) is reached:
-
-        VS1: &base = vx2
-       VS2: &base + vec_size*1 = vx0
-       VS3: &base + vec_size*2 = vx1
-       VS4: &base + vec_size*3 = vx3
-
-     Then permutation statements are generated:
-
-       VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
-       VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
-       ...
-
-     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
-     (the order of the data-refs in the output of vect_permute_store_chain
-     corresponds to the order of scalar stmts in the interleaving chain - see
-     the documentation of vect_permute_store_chain()).
-
-     In case of both multiple types and interleaving, above vector stores and
-     permutation stmts are created for every copy.  The result vector stmts are
-     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
-     STMT_VINFO_RELATED_STMT for the next copies.
-  */
-
   auto_vec<tree> dr_chain (group_size);
   auto_vec<tree> vec_masks;
   tree vec_mask = NULL;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 082e27c04d4..c58b9c2328b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2567,13 +2567,9 @@ extern tree bump_vector_ptr (vec_info *, tree, gimple *, 
gimple_stmt_iterator *,
                             stmt_vec_info, tree);
 extern void vect_copy_ref_info (tree, tree);
 extern tree vect_create_destination_var (tree, tree);
-extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
 extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
bool);
 extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
                                              bool, vec<int> * = nullptr);
-extern void vect_permute_store_chain (vec_info *, vec<tree> &,
-                                     unsigned int, stmt_vec_info,
-                                     gimple_stmt_iterator *, vec<tree> *);
 extern tree vect_setup_realignment (vec_info *,
                                    stmt_vec_info, gimple_stmt_iterator *,
                                    tree *, enum dr_alignment_support, tree,
-- 
2.43.0

Reply via email to