gcc/ChangeLog

2020-MM-DD  Kewen Lin  <li...@gcc.gnu.org>

        * doc/invoke.texi (vect-with-length-scope): Document new option.
        * params.opt (vect-with-length-scope): New.
        * tree-vect-loop-manip.c (vect_set_loop_lens_directly): New function.
        (vect_set_loop_condition_len): Likewise.
        (vect_set_loop_condition): Call vect_set_loop_condition_len for loop 
with
        length.
        (vect_gen_vector_loop_niters): Use VF as the step for loop with length.
        (vect_do_peeling): Adjust for loop with length.
        * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
        can_with_length_p and fully_with_length_p.
        (release_vec_loop_lens): New function.
        (_loop_vec_info::~_loop_vec_info): Use it to free the loop lens.
        (vect_verify_loop_lens): New function.
        (vect_analyze_loop_costing): Adjust for loop fully with length.
        (determine_peel_for_niter): Don't peel if loop fully with length.
        (vect_analyze_loop_2): Save LOOP_VINFO_CAN_WITH_LENGTH_P around retries,
        and free the length rgroups before retrying.  Check loop-wide reasons 
for
        disabling loops with length.  Make the final decision about use vector
        access with length or not.
        (vect_analyze_loop): Add handlings for epilogue of loop that can use 
vector
        with length but not.
        (vect_estimate_min_profitable_iters): Adjust for loop with length.
        (vectorizable_reduction): Disable loop with length.
        (vectorizable_live_operation): Likewise.
        (vect_record_loop_len): New function.
        (vect_get_loop_len): Likewise.
        (vect_transform_loop): Flag final loop iteration could be partial vector
        for loop with length.
        * tree-vect-stmts.c (check_load_store_with_len): New function.
        (vectorizable_store): Handle vector loop with length.
        (vectorizable_load): Likewise.
        (vect_gen_len): New function.
        * tree-vectorizer.h (struct rgroup_lens): New structure.
        (vec_loop_lens): New typedef.
        (_loop_vec_info): Add lens, can_with_length_p and fully_with_length_p.
        (LOOP_VINFO_CAN_WITH_LENGTH_P): New macro.
        (LOOP_VINFO_FULLY_WITH_LENGTH_P): Likewise.
        (LOOP_VINFO_LENS): Likewise.
        (vect_record_loop_len): New declare.
        (vect_get_loop_len): Likewise.
        (vect_gen_len): Likewise.


---
 gcc/doc/invoke.texi        |   7 +
 gcc/params.opt             |   4 +
 gcc/tree-vect-loop-manip.c | 268 ++++++++++++++++++++++++++++++++++++-
 gcc/tree-vect-loop.c       | 241 ++++++++++++++++++++++++++++++++-
 gcc/tree-vect-stmts.c      | 152 +++++++++++++++++++++
 gcc/tree-vectorizer.h      |  32 +++++
 6 files changed, 697 insertions(+), 7 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8b9935dfe65..ac765feab13 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13079,6 +13079,13 @@ by the copy loop headers pass.
 @item vect-epilogues-nomask
 Enable loop epilogue vectorization using smaller vector size.
 
+@item vect-with-length-scope
+Control the scope of vector memory access with length exploitation.  0 means we
+don't expliot any vector memory access with length, 1 means we only exploit
+vector memory access with length for those loops whose iteration number are
+less than VF, such as very small loop or epilogue, 2 means we want to exploit
+vector memory access with length for any loops if possible.
+
 @item slp-max-insns-in-bb
 Maximum number of instructions in basic block to be
 considered for SLP vectorization.
diff --git a/gcc/params.opt b/gcc/params.opt
index 4aec480798b..d4309101067 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -964,4 +964,8 @@ Bound on number of runtime checks inserted by the 
vectorizer's loop versioning f
 Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) 
Init(6) Param Optimization
 Bound on number of runtime checks inserted by the vectorizer's loop versioning 
for alignment check.
 
+-param=vect-with-length-scope=
+Common Joined UInteger Var(param_vect_with_length_scope) Init(0) 
IntegerRange(0, 2) Param Optimization
+Control the vector with length exploitation scope.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 8c5e696b995..3d5dec6f65c 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -747,6 +747,263 @@ vect_set_loop_condition_masked (class loop *loop, 
loop_vec_info loop_vinfo,
   return cond_stmt;
 }
 
+/* Helper for vect_set_loop_condition_len.  Like vect_set_loop_masks_directly,
+   generate definitions for all the lengths in RGL and return a length that is
+   nonzero when the loop needs to iterate.  Add any new preheader statements to
+   PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
+
+   RGL belongs to loop LOOP.  The loop originally iterated NITERS
+   times and has been vectorized according to LOOP_VINFO.  Each iteration
+   of the vectorized loop handles VF iterations of the scalar loop.
+
+   IV_LIMIT is the limit which induction variable can reach, that will be used
+   to check whether induction variable can wrap before hit the niters.  */
+
+static tree
+vect_set_loop_lens_directly (class loop *loop, loop_vec_info loop_vinfo,
+                             gimple_seq *preheader_seq,
+                             gimple_stmt_iterator loop_cond_gsi,
+                             rgroup_lens *rgl, tree niters, widest_int 
iv_limit)
+{
+  scalar_int_mode len_mode = targetm.vectorize.length_mode;
+  unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+  tree len_type = build_nonstandard_integer_type (len_prec, true);
+
+  tree vec_type = rgl->vec_type;
+  unsigned int nbytes_per_iter = rgl->nbytes_per_iter;
+  poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vec_type));
+  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  tree vec_size = build_int_cst (len_type, vector_size);
+
+  /* See whether zero-based IV would ever generate zero length before
+     wrapping around.  */
+  bool might_wrap_p = (iv_limit == -1);
+  if (!might_wrap_p)
+    {
+      widest_int iv_limit_max = iv_limit * nbytes_per_iter;
+      might_wrap_p = wi::min_precision (iv_limit_max, UNSIGNED) > len_prec;
+    }
+
+  /* Calculate the maximum number of bytes of scalars that the rgroup
+     handles in total, the number that it handles for each iteration
+     of the vector loop.  */
+  tree nbytes_total = niters;
+  tree nbytes_step = build_int_cst (len_type, vf);
+  if (nbytes_per_iter != 1)
+    {
+      tree factor = build_int_cst (len_type, nbytes_per_iter);
+      nbytes_total = gimple_build (preheader_seq, MULT_EXPR, len_type,
+                                  nbytes_total, factor);
+      nbytes_step = gimple_build (preheader_seq, MULT_EXPR, len_type,
+                                 nbytes_step, factor);
+    }
+
+  /* Create an induction variable that counts the processed bytes of scalars. 
*/
+  tree index_before_incr, index_after_incr;
+  gimple_stmt_iterator incr_gsi;
+  bool insert_after;
+  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+  create_iv (build_int_cst (len_type, 0), nbytes_step, NULL_TREE, loop,
+            &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
+
+  tree zero_index = build_int_cst (len_type, 0);
+  tree test_index, test_limit, first_limit;
+  gimple_stmt_iterator *test_gsi;
+
+  /* For the first iteration it doesn't matter whether the IV hits
+     a value above NBYTES_TOTAL.  That only matters for the latch
+     condition.  */
+  first_limit = nbytes_total;
+
+  if (might_wrap_p)
+    {
+      test_index = index_before_incr;
+      tree adjust = gimple_convert (preheader_seq, len_type, nbytes_step);
+      test_limit = gimple_build (preheader_seq, MAX_EXPR, len_type,
+                                nbytes_total, adjust);
+      test_limit = gimple_build (preheader_seq, MINUS_EXPR, len_type,
+                                test_limit, adjust);
+      test_gsi = &incr_gsi;
+    }
+  else
+    {
+      /* Test the incremented IV, which will always hit a value above
+        the bound before wrapping.  */
+      test_index = index_after_incr;
+      test_limit = nbytes_total;
+      test_gsi = &loop_cond_gsi;
+    }
+
+  /* Provide a definition of each length in the group.  */
+  tree next_len = NULL_TREE;
+  tree len;
+  unsigned int i;
+  FOR_EACH_VEC_ELT_REVERSE (rgl->lens, i, len)
+    {
+      /* Previous lengths will cover BIAS scalars.  This length covers the
+        next batch.  Each batch's length should be vector_size.  */
+      poly_uint64 bias = vector_size * i;
+      tree bias_tree = build_int_cst (len_type, bias);
+
+      /* See whether the first iteration of the vector loop is known
+        to have a full vector size.  */
+      poly_uint64 const_limit;
+      bool first_iteration_full
+       = (poly_int_tree_p (first_limit, &const_limit)
+          && known_ge (const_limit, (i + 1) * vector_size));
+
+      /* Rather than have a new IV that starts at BIAS and goes up to
+        TEST_LIMIT, prefer to use the same 0-based IV for each length
+        and adjust the bound down by BIAS.  */
+      tree this_test_limit = test_limit;
+      if (i != 0)
+       {
+         this_test_limit = gimple_build (preheader_seq, MAX_EXPR, len_type,
+                                         this_test_limit, bias_tree);
+         this_test_limit = gimple_build (preheader_seq, MINUS_EXPR, len_type,
+                                         this_test_limit, bias_tree);
+       }
+
+      /* Create the initial length.  First include all scalar bytes that
+        are within the loop limit.  */
+      tree init_len = NULL_TREE;
+      if (!first_iteration_full)
+       {
+         tree start, end;
+         if (first_limit == test_limit)
+           {
+             /* Use a natural test between zero (the initial IV value)
+                and the loop limit.  The "else" block would be valid too,
+                but this choice can avoid the need to load BIAS_TREE into
+                a register.  */
+             start = zero_index;
+             end = this_test_limit;
+           }
+         else
+           {
+             /* FIRST_LIMIT is the maximum number of scalar bytes handled by
+                the first iteration of the vector loop.  Test the portion
+                associated with this length.  */
+             start = bias_tree;
+             end = first_limit;
+           }
+
+         init_len = make_temp_ssa_name (len_type, NULL, "max_len");
+         gimple_seq seq = vect_gen_len (init_len, start, end, vec_size);
+         gimple_seq_add_seq (preheader_seq, seq);
+       }
+
+      /* First iteration is full.  */
+      if (!init_len)
+       init_len = vec_size;
+
+      /* Get the length value for the next iteration of the loop.  */
+      next_len = make_temp_ssa_name (len_type, NULL, "next_len");
+      tree end = this_test_limit;
+      gimple_seq seq = vect_gen_len (next_len, test_index, end, vec_size);
+      gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
+
+      /* Use mask routine for length.  */
+      vect_set_loop_mask (loop, len, init_len, next_len);
+    }
+
+  return next_len;
+}
+
+/* Like vect_set_loop_condition_masked, handle the case vector access with
+   length.  */
+
+static gcond *
+vect_set_loop_condition_len (class loop *loop, loop_vec_info loop_vinfo,
+                               tree niters, tree final_iv,
+                               bool niters_maybe_zero,
+                               gimple_stmt_iterator loop_cond_gsi)
+{
+  gimple_seq preheader_seq = NULL;
+  gimple_seq header_seq = NULL;
+  tree orig_niters = niters;
+
+  /* Type of the initial value of NITERS.  */
+  tree ni_actual_type = TREE_TYPE (niters);
+  unsigned int ni_actual_prec = TYPE_PRECISION (ni_actual_type);
+
+  /* Obtain target supported length type.  */
+  scalar_int_mode len_mode = targetm.vectorize.length_mode;
+  unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+  tree len_type = build_nonstandard_integer_type (len_prec, true);
+
+  /* Calculate the value that the induction variable must be able to hit in
+     order to ensure that we end the loop with an zero length.  */
+  widest_int iv_limit = -1;
+  unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
+  if (max_loop_iterations (loop, &iv_limit))
+    {
+      /* Round this value down to the previous vector alignment boundary and
+        then add an extra full iteration.  */
+      poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+      iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
+    }
+
+  /* Convert NITERS to the same size as the length.  */
+  if (niters_maybe_zero || (len_prec > ni_actual_prec))
+    {
+      /* We know that there is always at least one iteration, so if the
+        count is zero then it must have wrapped.  Cope with this by
+        subtracting 1 before the conversion and adding 1 to the result.  */
+      gcc_assert (TYPE_UNSIGNED (ni_actual_type));
+      niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type, niters,
+                            build_minus_one_cst (ni_actual_type));
+      niters = gimple_convert (&preheader_seq, len_type, niters);
+      niters = gimple_build (&preheader_seq, PLUS_EXPR, len_type, niters,
+                            build_one_cst (len_type));
+    }
+  else
+    niters = gimple_convert (&preheader_seq, len_type, niters);
+
+  /* Iterate over all the rgroups and fill in their lengths.  We could use
+     the first length from any rgroup for the loop condition; here we
+     arbitrarily pick the last.  */
+  tree test_len = NULL_TREE;
+  rgroup_lens *rgl;
+  unsigned int i;
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+
+  FOR_EACH_VEC_ELT (*lens, i, rgl)
+    if (!rgl->lens.is_empty ())
+      /* Set up all lens for this group.  */
+      test_len
+       = vect_set_loop_lens_directly (loop, loop_vinfo, &preheader_seq,
+                                      loop_cond_gsi, rgl, niters, iv_limit);
+
+  /* Emit all accumulated statements.  */
+  add_preheader_seq (loop, preheader_seq);
+  add_header_seq (loop, header_seq);
+
+  /* Get a boolean result that tells us whether to iterate.  */
+  edge exit_edge = single_exit (loop);
+  tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+  tree zero_len = build_zero_cst (TREE_TYPE (test_len));
+  gcond *cond_stmt
+    = gimple_build_cond (code, test_len, zero_len, NULL_TREE, NULL_TREE);
+  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
+
+  /* The loop iterates (NITERS - 1) / VF + 1 times.
+     Subtract one from this to get the latch count.  */
+  tree step = build_int_cst (len_type, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+  tree niters_minus_one
+    = fold_build2 (PLUS_EXPR, len_type, niters, build_minus_one_cst 
(len_type));
+  loop->nb_iterations
+    = fold_build2 (TRUNC_DIV_EXPR, len_type, niters_minus_one, step);
+
+  if (final_iv)
+    {
+      gassign *assign = gimple_build_assign (final_iv, orig_niters);
+      gsi_insert_on_edge_immediate (single_exit (loop), assign);
+    }
+
+  return cond_stmt;
+}
+
 /* Like vect_set_loop_condition, but handle the case in which there
    are no loop masks.  */
 
@@ -916,6 +1173,10 @@ vect_set_loop_condition (class loop *loop, loop_vec_info 
loop_vinfo,
     cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
                                                final_iv, niters_maybe_zero,
                                                loop_cond_gsi);
+  else if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+    cond_stmt = vect_set_loop_condition_len (loop, loop_vinfo, niters,
+                                               final_iv, niters_maybe_zero,
+                                               loop_cond_gsi);
   else
     cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
                                                  final_iv, niters_maybe_zero,
@@ -1939,7 +2200,8 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
 
   unsigned HOST_WIDE_INT const_vf;
   if (vf.is_constant (&const_vf)
-      && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+      && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
     {
       /* Create: niters >> log2(vf) */
       /* If it's known that niters == number of latch executions + 1 doesn't
@@ -2472,6 +2734,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   poly_uint64 bound_epilog = 0;
   if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
     bound_epilog += vf - 1;
   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
@@ -2567,7 +2830,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   if (vect_epilogues
       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && prolog_peeling >= 0
-      && known_eq (vf, lowest_vf))
+      && known_eq (vf, lowest_vf)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (epilogue_vinfo))
     {
       unsigned HOST_WIDE_INT eiters
        = (LOOP_VINFO_INT_NITERS (loop_vinfo)
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 80e33b61be7..d61f46becfd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -815,6 +815,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     vectorizable (false),
     can_fully_mask_p (true),
     fully_masked_p (false),
+    can_with_length_p (param_vect_with_length_scope != 0),
+    fully_with_length_p (false),
     peeling_for_gaps (false),
     peeling_for_niter (false),
     no_data_dependencies (false),
@@ -887,6 +889,18 @@ release_vec_loop_masks (vec_loop_masks *masks)
   masks->release ();
 }
 
+/* Free all levels of LENS.  */
+
+void
+release_vec_loop_lens (vec_loop_lens *lens)
+{
+  rgroup_lens *rgl;
+  unsigned int i;
+  FOR_EACH_VEC_ELT (*lens, i, rgl)
+    rgl->lens.release ();
+  lens->release ();
+}
+
 /* Free all memory used by the _loop_vec_info, as well as all the
    stmt_vec_info structs of all the stmts in the loop.  */
 
@@ -895,6 +909,7 @@ _loop_vec_info::~_loop_vec_info ()
   free (bbs);
 
   release_vec_loop_masks (&masks);
+  release_vec_loop_lens (&lens);
   delete ivexpr_map;
   delete scan_map;
   epilogue_vinfos.release ();
@@ -1056,6 +1071,44 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
   return true;
 }
 
+/* Check whether we can use vector access with length based on precison
+   comparison.  So far, to keep it simple, we only allow the case that the
+   precision of the target supported length is larger than the precision
+   required by loop niters.  */
+
+static bool
+vect_verify_loop_lens (loop_vec_info loop_vinfo)
+{
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+
+  if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
+    return false;
+
+  /* Get the maximum number of iterations that is representable
+     in the counter type.  */
+  tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
+  widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
+
+  /* Get a more refined estimate for the number of iterations.  */
+  widest_int max_back_edges;
+  if (max_loop_iterations (loop, &max_back_edges))
+    max_ni = wi::smin (max_ni, max_back_edges + 1);
+
+  /* Account for rgroup lengths, in which each bit is replicated N times.  */
+  rgroup_lens *rgl = &(*lens)[lens->length () - 1];
+  max_ni *= rgl->nbytes_per_iter;
+
+  /* Work out how many bits we need to represent the limit.  */
+  unsigned int min_ni_width = wi::min_precision (max_ni, UNSIGNED);
+
+  unsigned len_bits = GET_MODE_PRECISION (targetm.vectorize.length_mode);
+  if (len_bits < min_ni_width)
+    return false;
+
+  return true;
+}
+
 /* Calculate the cost of one scalar iteration of the loop.  */
 static void
 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
@@ -1630,7 +1683,8 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
 
   /* Only fully-masked loops can have iteration counts less than the
      vectorization factor.  */
-  if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
     {
       if (known_niters_smaller_than_vf (loop_vinfo))
        {
@@ -1858,7 +1912,8 @@ determine_peel_for_niter (loop_vec_info loop_vinfo)
     th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
                                          (loop_vinfo));
 
-  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
     /* The main loop handles all iterations.  */
     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
@@ -2048,6 +2103,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal, unsigned *n_stmts)
     }
 
   bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
+  bool saved_can_with_length_p = LOOP_VINFO_CAN_WITH_LENGTH_P(loop_vinfo);
 
   /* We don't expect to have to roll back to anything other than an empty
      set of rgroups.  */
@@ -2144,6 +2200,71 @@ start_over:
                         "not using a fully-masked loop.\n");
     }
 
+  /* Decide whether we can use vector access with length.  */
+
+  if ((LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+       || LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+      && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length becuase peeling"
+                        " for alignment or gaps is required.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+    }
+
+  if (LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo)
+      && !vect_verify_loop_lens (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length becuase the"
+                        " length precision verification fail.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+    }
+
+  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length becuase the"
+                        " loop will be fully-masked.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+    }
+
+  if (LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+    {
+      /* One special case, the loop with max niters less than VF, we can simply
+        take it as body with length.  */
+      if (param_vect_with_length_scope == 1)
+       {
+         /* This is the epilogue, should be less than VF.  */
+         if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+           LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+         /* Otherwise, ensure the loop iteration less than VF.  */
+         else if (known_niters_smaller_than_vf (loop_vinfo))
+           LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+       }
+      else
+       {
+         gcc_assert (param_vect_with_length_scope == 2);
+         LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = true;
+       }
+    }
+  else
+    /* Always set it as false in case previous tries set it.  */
+    LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) = false;
+
+  if (dump_enabled_p ())
+    {
+      if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+       dump_printf_loc (MSG_NOTE, vect_location, "using vector access with"
+                                                 " length for loop fully.\n");
+      else
+       dump_printf_loc (MSG_NOTE, vect_location, "not using vector access with"
+                                                 " length for loop fully.\n");
+    }
+
   /* If epilog loop is required because of data accesses with gaps,
      one additional iteration needs to be peeled.  Check if there is
      enough iterations for vectorization.  */
@@ -2164,6 +2285,7 @@ start_over:
      loop or a loop that has a lower VF than the main loop.  */
   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
       && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
       && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
                   LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
     return opt_result::failure_at (vect_location,
@@ -2362,12 +2484,14 @@ again:
     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
   /* Reset accumulated rgroup information.  */
   release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
+  release_vec_loop_lens (&LOOP_VINFO_LENS (loop_vinfo));
   /* Reset assorted flags.  */
   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
   LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
   LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
+  LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = saved_can_with_length_p;
 
   goto start_over;
 }
@@ -2646,8 +2770,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
              if (ordered_p (lowest_th, th))
                lowest_th = ordered_min (lowest_th, th);
            }
-         else
-           delete loop_vinfo;
+         else {
+             delete loop_vinfo;
+             loop_vinfo = opt_loop_vec_info::success (NULL);
+         }
 
          /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
             enabled, SIMDUID is not set, it is the innermost loop and we have
@@ -2672,6 +2798,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
       else
        {
          delete loop_vinfo;
+         loop_vinfo = opt_loop_vec_info::success (NULL);
          if (fatal)
            {
              gcc_checking_assert (first_loop_vinfo == NULL);
@@ -2679,6 +2806,21 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
            }
        }
 
+      /* If the original loop can use vector access with length but we still
+        get true vect_epilogue here, it would try vector access with length
+        on epilogue and with the same mode.  */
+      if (vect_epilogues && loop_vinfo
+         && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+       {
+         gcc_assert (!LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo));
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "***** Re-trying analysis with same vector"
+                            " mode %s for epilogue with length.\n",
+                            GET_MODE_NAME (loop_vinfo->vector_mode));
+         continue;
+       }
+
       if (mode_i < vector_modes.length ()
          && VECTOR_MODE_P (autodetected_vector_mode)
          && (related_vector_mode (vector_modes[mode_i],
@@ -3519,6 +3661,11 @@ vect_estimate_min_profitable_iters (loop_vec_info 
loop_vinfo,
                            target_cost_data, num_masks - 1, vector_stmt,
                            NULL, NULL_TREE, 0, vect_body);
     }
+  else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+    {
+      peel_iters_prologue = 0;
+      peel_iters_epilogue = 0;
+    }
   else if (npeel < 0)
     {
       peel_iters_prologue = assumed_vf / 2;
@@ -3809,6 +3956,7 @@ vect_estimate_min_profitable_iters (loop_vec_info 
loop_vinfo,
                 min_profitable_iters);
 
   if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
       && min_profitable_iters < (assumed_vf + peel_iters_prologue))
     /* We want the vectorized loop to execute at least once.  */
     min_profitable_iters = assumed_vf + peel_iters_prologue;
@@ -6761,6 +6909,16 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
     dump_printf_loc (MSG_NOTE, vect_location,
                     "using an in-order (fold-left) reduction.\n");
   STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
+
+  if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length due to"
+                        " reduction operation.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+    }
+
   /* All but single defuse-cycle optimized, lane-reducing and fold-left
      reductions go through their own vectorizable_* routines.  */
   if (!single_defuse_cycle
@@ -8041,6 +8199,16 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
                                     1, vectype, NULL);
            }
        }
+
+      if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+       {
+         LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't use vector access with length due to"
+                            " live operation.\n");
+       }
+
       return true;
     }
 
@@ -8354,6 +8522,66 @@ vect_get_loop_mask (gimple_stmt_iterator *gsi, 
vec_loop_masks *masks,
   return mask;
 }
 
+/* Record that LOOP_VINFO would need LENS to contain a sequence of NVECTORS
+   lengths for vector access with length that each control a vector of type
+   VECTYPE.  */
+
+void
+vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
+                      unsigned int nvectors, tree vectype)
+{
+  gcc_assert (nvectors != 0);
+  if (lens->length () < nvectors)
+    lens->safe_grow_cleared (nvectors);
+  rgroup_lens *rgl = &(*lens)[nvectors - 1];
+
+  /* The number of scalars per iteration, total bytes of them and the number of
+     vectors are both compile-time constants.  */
+  poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  unsigned int nbytes_per_iter
+    = exact_div (nvectors * vector_size, vf).to_constant ();
+
+  /* The one associated to the same nvectors should have the same bytes per
+     iteration.  */
+  if (!rgl->vec_type)
+    {
+      rgl->vec_type = vectype;
+      rgl->nbytes_per_iter = nbytes_per_iter;
+    }
+  else
+    gcc_assert (rgl->nbytes_per_iter == nbytes_per_iter);
+}
+
+/* Given a complete set of length LENS, extract length number INDEX for an
+   rgroup that operates on NVECTORS vectors, where 0 <= INDEX < NVECTORS.  */
+
+tree
+vect_get_loop_len (vec_loop_lens *lens, unsigned int nvectors, unsigned int 
index)
+{
+  rgroup_lens *rgl = &(*lens)[nvectors - 1];
+
+  /* Populate the rgroup's len array, if this is the first time we've
+     used it.  */
+  if (rgl->lens.is_empty ())
+    {
+      rgl->lens.safe_grow_cleared (nvectors);
+      for (unsigned int i = 0; i < nvectors; ++i)
+       {
+         scalar_int_mode len_mode = targetm.vectorize.length_mode;
+         unsigned int len_prec = GET_MODE_PRECISION (len_mode);
+         tree len_type = build_nonstandard_integer_type (len_prec, true);
+         tree len = make_temp_ssa_name (len_type, NULL, "loop_len");
+
+         /* Provide a dummy definition until the real one is available.  */
+         SSA_NAME_DEF_STMT (len) = gimple_build_nop ();
+         rgl->lens[i] = len;
+       }
+    }
+
+  return rgl->lens[index];
+}
+
 /* Scale profiling counters by estimation for LOOP which is vectorized
    by factor VF.  */
 
@@ -8714,6 +8942,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
     {
       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
          && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+         && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
          && known_eq (lowest_vf, vf))
        {
          niters_vector
@@ -8881,7 +9110,9 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
 
   /* True if the final iteration might not handle a full vector's
      worth of scalar iterations.  */
-  bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool final_iter_may_be_partial
+    = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
   /* The minimum number of iterations performed by the epilogue.  This
      is 1 when peeling for gaps because we always need a final scalar
      iteration.  */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e7822c44951..d6be39e1831 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1879,6 +1879,66 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree 
vectype,
     gcc_unreachable ();
 }
 
+/* Check whether a load or store statement in the loop described by
+   LOOP_VINFO is possible to go with length.  This is testing whether
+   the vectorizer pass has the appropriate support, as well as whether
+   the target does.
+
+   VLS_TYPE says whether the statement is a load or store and VECTYPE
+   is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
+   says how the load or store is going to be implemented and GROUP_SIZE
+   is the number of load or store statements in the containing group.
+
+   Clear LOOP_VINFO_CAN_WITH_LENGTH_P if it can't go with length, otherwise
+   record the required length types.  */
+
+static void
+check_load_store_with_len (loop_vec_info loop_vinfo, tree vectype,
+                     vec_load_store_type vls_type, int group_size,
+                     vect_memory_access_type memory_access_type)
+{
+  /* Invariant loads need no special support.  */
+  if (memory_access_type == VMAT_INVARIANT)
+    return;
+
+  if (memory_access_type != VMAT_CONTIGUOUS
+      && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length"
+                        " because an access isn't contiguous.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+      return;
+    }
+
+  machine_mode vecmode = TYPE_MODE (vectype);
+  bool is_load = (vls_type == VLS_LOAD);
+  optab op = is_load ? lenload_optab : lenstore_optab;
+
+  if (!VECTOR_MODE_P (vecmode)
+      || !convert_optab_handler (op, vecmode, targetm.vectorize.length_mode))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "can't use vector access with length because"
+                        " the target doesn't have the appropriate"
+                        " load or store with length.\n");
+      LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo) = false;
+      return;
+    }
+
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  unsigned int nvectors;
+
+  if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
+    vect_record_loop_len (loop_vinfo, lens, nvectors, vectype);
+  else
+    gcc_unreachable ();
+}
+
 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
    that needs to be applied to all loads and stores in a vectorized loop.
@@ -7532,6 +7592,10 @@ vectorizable_store (vec_info *vinfo,
        check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
                                  memory_access_type, &gs_info, mask);
 
+      if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+       check_load_store_with_len (loop_vinfo, vectype, vls_type, group_size,
+                                     memory_access_type);
+
       if (slp_node
          && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
                                                vectype))
@@ -8068,6 +8132,15 @@ vectorizable_store (vec_info *vinfo,
     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
        ? &LOOP_VINFO_MASKS (loop_vinfo)
        : NULL);
+
+  vec_loop_lens *loop_lens
+    = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+        ? &LOOP_VINFO_LENS (loop_vinfo)
+        : NULL);
+
+  /* Shouldn't go with length if fully masked.  */
+  gcc_assert (!loop_lens || (loop_lens && !loop_masks));
+
   /* Targets with store-lane instructions must not require explicit
      realignment.  vect_supportable_dr_alignment always returns either
      dr_aligned or dr_unaligned_supported for masked operations.  */
@@ -8320,10 +8393,15 @@ vectorizable_store (vec_info *vinfo,
              unsigned HOST_WIDE_INT align;
 
              tree final_mask = NULL_TREE;
+             tree final_len = NULL_TREE;
              if (loop_masks)
                final_mask = vect_get_loop_mask (gsi, loop_masks,
                                                 vec_num * ncopies,
                                                 vectype, vec_num * j + i);
+             else if (loop_lens)
+               final_len = vect_get_loop_len (loop_lens, vec_num * ncopies,
+                                              vec_num * j + i);
+
              if (vec_mask)
                final_mask = prepare_load_store_mask (mask_vectype, final_mask,
                                                      vec_mask, gsi);
@@ -8403,6 +8481,17 @@ vectorizable_store (vec_info *vinfo,
                  new_stmt_info
                    = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
                }
+             else if (final_len)
+               {
+                 align = least_bit_hwi (misalign | align);
+                 tree ptr = build_int_cst (ref_type, align);
+                 gcall *call
+                   = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
+                                                 ptr, final_len, vec_oprnd);
+                 gimple_call_set_nothrow (call, true);
+                 new_stmt_info
+                   = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+               }
              else
                {
                  data_ref = fold_build2 (MEM_REF, vectype,
@@ -8839,6 +8928,10 @@ vectorizable_load (vec_info *vinfo,
        check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
                                  memory_access_type, &gs_info, mask);
 
+      if (loop_vinfo && LOOP_VINFO_CAN_WITH_LENGTH_P (loop_vinfo))
+       check_load_store_with_len (loop_vinfo, vectype, VLS_LOAD, group_size,
+                                     memory_access_type);
+
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
                            slp_node, cost_vec);
@@ -8937,6 +9030,7 @@ vectorizable_load (vec_info *vinfo,
 
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
+      gcc_assert (!LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo));
 
       if (grouped_load)
        {
@@ -9234,6 +9328,15 @@ vectorizable_load (vec_info *vinfo,
     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
        ? &LOOP_VINFO_MASKS (loop_vinfo)
        : NULL);
+
+  vec_loop_lens *loop_lens
+    = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+        ? &LOOP_VINFO_LENS (loop_vinfo)
+        : NULL);
+
+  /* Shouldn't go with length if fully masked.  */
+  gcc_assert (!loop_lens || (loop_lens && !loop_masks));
+
   /* Targets with store-lane instructions must not require explicit
      realignment.  vect_supportable_dr_alignment always returns either
      dr_aligned or dr_unaligned_supported for masked operations.  */
@@ -9555,15 +9658,20 @@ vectorizable_load (vec_info *vinfo,
          for (i = 0; i < vec_num; i++)
            {
              tree final_mask = NULL_TREE;
+             tree final_len = NULL_TREE;
              if (loop_masks
                  && memory_access_type != VMAT_INVARIANT)
                final_mask = vect_get_loop_mask (gsi, loop_masks,
                                                 vec_num * ncopies,
                                                 vectype, vec_num * j + i);
+             else if (loop_lens && memory_access_type != VMAT_INVARIANT)
+               final_len = vect_get_loop_len (loop_lens, vec_num * ncopies,
+                                              vec_num * j + i);
              if (vec_mask)
                final_mask = prepare_load_store_mask (mask_vectype, final_mask,
                                                      vec_mask, gsi);
 
+
              if (i > 0)
                dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
                                               gsi, stmt_info, bump);
@@ -9629,6 +9737,18 @@ vectorizable_load (vec_info *vinfo,
                        new_stmt = call;
                        data_ref = NULL_TREE;
                      }
+                   else if (final_len)
+                     {
+                       align = least_bit_hwi (misalign | align);
+                       tree ptr = build_int_cst (ref_type, align);
+                       gcall *call
+                         = gimple_build_call_internal (IFN_LEN_LOAD, 3,
+                                                       dataref_ptr, ptr,
+                                                       final_len);
+                       gimple_call_set_nothrow (call, true);
+                       new_stmt = call;
+                       data_ref = NULL_TREE;
+                     }
                    else
                      {
                        tree ltype = vectype;
@@ -12480,3 +12600,35 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, 
stmt_vec_info stmt_info,
   *nunits_vectype_out = nunits_vectype;
   return opt_result::success ();
 }
+
+/* Generate and return statement sequence that sets vector length LEN that is:
+
+   min_of_start_and_end = min (START_INDEX, END_INDEX);
+   left_bytes = END_INDEX - min_of_start_and_end;
+   rhs = min (left_bytes, VECTOR_SIZE);
+   LEN = rhs;
+
+   TODO: for now, rs6000 supported vector with length only cares 8-bits, which
+   means if we have left_bytes larger than 255, it can't be saturated to vector
+   size.  One target hook can be provided if other ports don't suffer this.
+*/
+
+gimple_seq
+vect_gen_len (tree len, tree start_index, tree end_index, tree vector_size)
+{
+  gimple_seq stmts = NULL;
+  tree len_type = TREE_TYPE (len);
+  gcc_assert (TREE_TYPE (start_index) == len_type);
+
+  tree min = fold_build2 (MIN_EXPR, len_type, start_index, end_index);
+  tree left_bytes = fold_build2 (MINUS_EXPR, len_type, end_index, min);
+  left_bytes = fold_build2 (MIN_EXPR, len_type, left_bytes, vector_size);
+
+  tree rhs = force_gimple_operand (left_bytes, &stmts, true, NULL_TREE);
+  gimple *new_stmt = gimple_build_assign (len, rhs);
+  gimple_stmt_iterator i = gsi_last (stmts);
+  gsi_insert_after_without_update (&i, new_stmt, GSI_CONTINUE_LINKING);
+
+  return stmts;
+}
+
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2eb3ab5d280..774d5025639 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -476,6 +476,21 @@ struct rgroup_masks {
 
 typedef auto_vec<rgroup_masks> vec_loop_masks;
 
+/* Similar to masks above, the lengths needed by rgroups with nV vectors.  */
+struct rgroup_lens
+{
+  /* The total bytes for any nS per iteration.  */
+  unsigned int nbytes_per_iter;
+
+  /* Any vector type to use these lengths.  */
+  tree vec_type;
+
+  /* A vector of nV lengths, in iteration order.  */
+  vec<tree> lens;
+};
+
+typedef auto_vec<rgroup_lens> vec_loop_lens;
+
 typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
 
 /*-----------------------------------------------------------------*/
@@ -523,6 +538,10 @@ public:
      on inactive scalars.  */
   vec_loop_masks masks;
 
+  /* The lengths that a loop with length should use to avoid operating
+     on inactive scalars.  */
+  vec_loop_lens lens;
+
   /* Set of scalar conditions that have loop mask applied.  */
   scalar_cond_masked_set_type scalar_cond_masked_set;
 
@@ -626,6 +645,12 @@ public:
   /* True if have decided to use a fully-masked loop.  */
   bool fully_masked_p;
 
+  /* Records whether we still have the option of using a length access loop.  
*/
+  bool can_with_length_p;
+
+  /* True if have decided to use length access for the loop fully.  */
+  bool fully_with_length_p;
+
   /* When we have grouped data accesses with gaps, we may introduce invalid
      memory accesses.  We peel the last iteration of the loop to prevent
      this.  */
@@ -689,6 +714,9 @@ public:
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
 #define LOOP_VINFO_CAN_FULLY_MASK_P(L)     (L)->can_fully_mask_p
 #define LOOP_VINFO_FULLY_MASKED_P(L)       (L)->fully_masked_p
+#define LOOP_VINFO_CAN_WITH_LENGTH_P(L)    (L)->can_with_length_p
+#define LOOP_VINFO_FULLY_WITH_LENGTH_P(L)  (L)->fully_with_length_p
+#define LOOP_VINFO_LENS(L)                 (L)->lens
 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
 #define LOOP_VINFO_MASKS(L)                (L)->masks
@@ -1842,6 +1870,10 @@ extern void vect_record_loop_mask (loop_vec_info, 
vec_loop_masks *,
                                   unsigned int, tree, tree);
 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
                                unsigned int, tree, unsigned int);
+extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
+                                 tree);
+extern tree vect_get_loop_len (vec_loop_lens *, unsigned int, unsigned int);
+extern gimple_seq vect_gen_len (tree, tree, tree, tree);
 extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
 
 /* Drive for loop transformation stage.  */
-- 

Reply via email to