The following implements the simple case of single-lane SLP when
using partial vectors which can use the VEC_EXTRACT_LAST code
generation without changes.  I'll keep the PR open for further
enhancements.

This avoids FAILs of gcc.target/aarch64/sve/live_1.c when using
single-lane SLP for non-grouped stores.

        PR tree-optimization/116609
        * tree-vect-loop.cc (vectorizable_live_operation_1): Support
        partial vectors for single-lane SLP.
---
 gcc/tree-vect-loop.cc | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 242d5e2d916..31cdc4bf53d 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10961,7 +10961,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 
         where VEC_LHS is the vectorized live-out result and MASK is
         the loop mask for the final iteration.  */
-      gcc_assert (ncopies == 1 && !slp_node);
+      gcc_assert (ncopies == 1
+                 && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
       gimple_seq tem = NULL;
       gimple_stmt_iterator gsi = gsi_last (tem);
       tree len = vect_get_loop_len (loop_vinfo, &gsi,
@@ -10995,7 +10996,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 
         where VEC_LHS is the vectorized live-out result and MASK is
         the loop mask for the final iteration.  */
-      gcc_assert (!slp_node);
+      gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1);
       tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
       gimple_seq tem = NULL;
       gimple_stmt_iterator gsi = gsi_last (tem);
@@ -11147,7 +11148,7 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
       /* No transformation required.  */
       if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
        {
-         if (slp_node)
+         if (slp_node && SLP_TREE_LANES (slp_node) != 1)
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11156,7 +11157,8 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
                                 "the loop.\n");
              LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
-         else if (ncopies > 1)
+         else if (ncopies > 1
+                  || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11166,7 +11168,8 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
            }
          else
            {
-             gcc_assert (ncopies == 1 && !slp_node);
+             gcc_assert (ncopies == 1
+                         && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
              if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
                                                  OPTIMIZE_FOR_SPEED))
                vect_record_loop_mask (loop_vinfo,
@@ -11213,8 +11216,9 @@ vectorizable_live_operation (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (slp_node)
     {
       gcc_assert (!loop_vinfo
-                 || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-                     && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
+                 || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+                      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+                     || SLP_TREE_LANES (slp_node) == 1));
 
       /* Get the correct slp vectorized stmt.  */
       vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
-- 
2.43.0

Reply via email to