On Wed, 20 Dec 2023, Tamar Christina wrote:

> > > +       /* If we've moved a VDEF, extract the defining MEM and update
> > > +          usages of it.   */
> > > +       tree vdef;
> > > +       /* This statement is to be moved.  */
> > > +       if ((vdef = gimple_vdef (stmt)))
> > > +         LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS
> > (loop_vinfo).safe_push (
> > > +             stmt);
> > 
> > I'm also unsure why you need 'chain' at all given you have the vector
> > of stores to be moved?
> > 
> 
> Yeah, so originally I wanted to move statements other than stores.  While 
> stores
> are needed for correctness, the other statements would be so we didn't extend 
> the
> live range too much for intermediate values.
> 
> This proved difficult but eventually I got it to work, but as you saw it was 
> meh code.
> Instead I guess the better approach is to teach sched1 in GCC 15 to schedule 
> across
> branches in loops.
> 
> With that in mind, I changed it to move only stores.  Since stores never 
> produce a
> and are sinks, I don't really need fixed nor chain.
> 
> So here's a much cleaned up patch.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.
 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * tree-if-conv.cc (ref_within_array_bound): Expose.
>       * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): New.
>       (vect_analyze_data_ref_dependences): Use them.
>       * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
>       early_breaks.
>       (move_early_exit_stmts): New.
>       (vect_transform_loop): use it/
>       * tree-vect-stmts.cc (vect_is_simple_use): Use vect_early_exit_def.
>       * tree-vectorizer.h (enum vect_def_type): Add vect_early_exit_def.
>       (ref_within_array_bound): New.
>       (class _loop_vec_info): Add early_breaks, early_break_conflict,
>       early_break_vuses.
>       (LOOP_VINFO_EARLY_BREAKS): New.
>       (LOOP_VINFO_EARLY_BRK_STORES): New.
>       (LOOP_VINFO_EARLY_BRK_DEST_BB): New.
>       (LOOP_VINFO_EARLY_BRK_VUSES): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/vect/vect-early-break_57.c: Update.
>       * gcc.dg/vect/vect-early-break_79.c: New test.
>       * gcc.dg/vect/vect-early-break_80.c: New test.
>       * gcc.dg/vect/vect-early-break_81.c: New test.
>       * gcc.dg/vect/vect-early-break_83.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
> index 
> be4a0c7426093059ce37a9f824defb7ae270094d..9a4e795f92b7a8577ac71827f5cb0bd15d88ebe1
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
> @@ -5,6 +5,7 @@
>  /* { dg-additional-options "-Ofast" } */
>  
>  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */
>  
>  void abort ();
>  
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..a26011ef1ba5aa000692babc90d46621efc2f8b5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast" } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#undef N
> +#define N 32
> +
> +unsigned vect_a[N];
> +unsigned vect_b[N];
> +  
> +unsigned test4(unsigned x)
> +{
> + unsigned ret = 0;
> + for (int i = 0; i < 1024; i++)
> + {
> +   vect_b[i] = x + i;
> +   if (vect_a[i] > x)
> +     break;
> +   vect_a[i] = x;
> +   
> + }
> + return ret;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..ddf504e0c8787ae33a0e98045c1c91f2b9f533a9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c
> @@ -0,0 +1,43 @@
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +extern void abort ();
> +
> +int x;
> +__attribute__ ((noinline, noipa))
> +void foo (int *a, int *b)
> +{
> +  int local_x = x;
> +  for (int i = 0; i < 1024; ++i)
> +    {
> +      if (i + local_x == 13)
> +        break;
> +      a[i] = 2 * b[i];
> +    }
> +}
> +
> +int main ()
> +{
> +  int a[1024] = {0};
> +  int b[1024] = {0};
> +
> +  for (int i = 0; i < 1024; i++)
> +    b[i] = i;
> +
> +  x = -512;
> +  foo (a, b);
> +
> +  if (a[524] != 1048)
> +    abort ();
> +
> +  if (a[525] != 0)
> +    abort ();
> +
> +  if (a[1023] != 0)
> +    abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..c38e394ad87863f0702d422cb58018b979c9fba6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */
> +void abort ();
> +
> +unsigned short sa[32];
> +unsigned short sc[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
> +  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
> +unsigned short sb[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
> +  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
> +unsigned int ia[32];
> +unsigned int ic[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
> +        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> +unsigned int ib[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
> +        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> +
> +int main2 (int n)
> +{
> +  int i;
> +  for (i = 0; i < n - 3; i++)
> +    {
> +      if (sa[i+3] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
> +        abort ();
> +    }
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..227dcf1b7ab2ace149e692a6aab41cdd5d47d098
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast" } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#include <complex.h>
> +
> +#define N 1024
> +complex double vect_a[N];
> +complex double vect_b[N];
> +  
> +complex double test4(complex double x)
> +{
> + complex double ret = 0;
> + for (int i = 0; i < N; i++)
> + {
> +   volatile complex double z = vect_b[i];
> +   vect_b[i] = x + i + z;
> +   if (vect_a[i] == x)
> +     return i;
> +   vect_a[i] += x * vect_b[i];
> +   
> + }
> + return ret;
> +}
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 
> 0bde281c2468d8e7f43afc4fe0f757e221ad5edb..a31e3d5161684878a79817d30a6955c8370444d8
>  100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -844,7 +844,7 @@ idx_within_array_bound (tree ref, tree *idx, void *dta)
>  
>  /* Return TRUE if ref is a within bound array reference.  */
>  
> -static bool
> +bool
>  ref_within_array_bound (gimple *stmt, tree ref)
>  {
>    class loop *loop = loop_containing_stmt (stmt);
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 
> d5c9c4a11c2e5d8fd287f412bfa86d081c2f8325..85ae75ff2eb12b4299e8b7b91d0cf16e4549d08e
>  100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -613,6 +613,241 @@ vect_analyze_data_ref_dependence (struct 
> data_dependence_relation *ddr,
>    return opt_result::success ();
>  }
>  
> +/* Funcion vect_analyze_early_break_dependences.
> +
> +   Examime all the data references in the loop and make sure that if we have
> +   mulitple exits that we are able to safely move stores such that they 
> become
> +   safe for vectorization.  The function also calculates the place where to 
> move
> +   the instructions to and computes what the new vUSE chain should be.
> +
> +   This works in tandem with the CFG that will be produced by
> +   slpeel_tree_duplicate_loop_to_edge_cfg later on.
> +
> +   This function tries to validate whether an early break vectorization
> +   is possible for the current instruction sequence. Returns True i
> +   possible, otherwise False.
> +
> +   Requirements:
> +     - Any memory access must be to a fixed size buffer.
> +     - There must not be any loads and stores to the same object.
> +     - Multiple loads are allowed as long as they don't alias.
> +
> +   NOTE:
> +     This implemementation is very conservative. Any overlappig loads/stores
> +     that take place before the early break statement gets rejected aside 
> from
> +     WAR dependencies.
> +
> +     i.e.:
> +
> +     a[i] = 8
> +     c = a[i]
> +     if (b[i])
> +       ...
> +
> +     is not allowed, but
> +
> +     c = a[i]
> +     a[i] = 8
> +     if (b[i])
> +       ...
> +
> +     is which is the common case.  */
> +
> +static opt_result
> +vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
> +{
> +  DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
> +
> +  /* List of all load data references found during traversal.  */
> +  auto_vec<data_reference *> bases;
> +  basic_block dest_bb = NULL;
> +
> +  hash_set <gimple *> visited;
> +  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> +  class loop *loop_nest = loop_outer (loop);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "loop contains multiple exits, analyzing"
> +                  " statement dependencies.\n");
> +
> +  for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
> +    {
> +      stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
> +      if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
> +     continue;
> +
> +      gimple_stmt_iterator gsi = gsi_for_stmt (c);
> +
> +      /* Now analyze all the remaining statements and try to determine which
> +      instructions are allowed/needed to be moved.  */
> +      while (!gsi_end_p (gsi))
> +     {
> +       gimple *stmt = gsi_stmt (gsi);
> +       gsi_prev (&gsi);
> +       if (!gimple_has_ops (stmt)
> +           || is_gimple_debug (stmt))
> +         continue;
> +
> +       stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
> +       auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> +       if (!dr_ref)
> +         continue;
> +
> +       /* We currently only support statically allocated objects due to
> +          not having first-faulting loads support or peeling for
> +          alignment support.  Compute the size of the referenced object
> +          (it could be dynamically allocated).  */
> +       tree obj = DR_BASE_ADDRESS (dr_ref);
> +       if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                              "early breaks only supported on statically"
> +                              " allocated objects.\n");
> +           return opt_result::failure_at (c,
> +                              "can't safely apply code motion to "
> +                              "dependencies of %G to vectorize "
> +                              "the early exit.\n", c);
> +         }
> +
> +       tree refop = TREE_OPERAND (obj, 0);
> +       tree refbase = get_base_address (refop);
> +       if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> +           || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                              "early breaks only supported on"
> +                              " statically allocated objects.\n");
> +           return opt_result::failure_at (c,
> +                              "can't safely apply code motion to "
> +                              "dependencies of %G to vectorize "
> +                              "the early exit.\n", c);
> +         }
> +
> +       /* Check if vector accesses to the object will be within bounds.
> +          must be a constant or assume loop will be versioned or niters
> +          bounded by VF so accesses are within range.  */
> +       if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                              "early breaks not supported: vectorization "
> +                              "would %s beyond size of obj.",
> +                              DR_IS_READ (dr_ref) ? "read" : "write");
> +           return opt_result::failure_at (c,
> +                              "can't safely apply code motion to "
> +                              "dependencies of %G to vectorize "
> +                              "the early exit.\n", c);
> +         }
> +
> +       if (DR_IS_READ (dr_ref))
> +         bases.safe_push (dr_ref);
> +       else if (DR_IS_WRITE (dr_ref))
> +         {
> +           /* We are moving writes down in the CFG.  To be sure that this
> +              is valid after vectorization we have to check all the loads
> +              we are sinking the stores past to see if any of them may
> +              alias or are the same object.
> +
> +              Same objects will not be an issue because unless the store
> +              is marked volatile the value can be forwarded.  If the
> +              store is marked volatile we don't vectorize the loop
> +              anyway.
> +
> +              That leaves the check for aliasing.  We don't really need
> +              to care about the stores aliasing with each other since the
> +              stores are moved in order so the effects are still observed
> +              correctly.  This leaves the check for WAR dependencies
> +              which we would be introducing here if the DR can alias.
> +              The check is quadratic in loads/stores but I have not found
> +              a better API to do this.  I believe all loads and stores
> +              must be checked.  We also must check them when we
> +              encountered the store, since we don't care about loads past
> +              the store.  */
> +
> +           for (auto dr_read : bases)
> +             if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
> +               {
> +                 if (dump_enabled_p ())
> +                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
> +                                    vect_location,
> +                                    "early breaks not supported: "
> +                                    "overlapping loads and stores "
> +                                    "found before the break "
> +                                    "statement.\n");
> +
> +                 return opt_result::failure_at (stmt,
> +                          "can't safely apply code motion to dependencies"
> +                          " to vectorize the early exit. %G may alias with"
> +                          " %G\n", stmt, dr_read->stmt);
> +               }
> +         }
> +
> +       if (gimple_vdef (stmt))
> +         {
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "==> recording stmt %G", stmt);
> +
> +           LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
> +         }
> +       else if (gimple_vuse (stmt))
> +         {
> +           LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
> +           if (dump_enabled_p ())
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "marked statement for vUSE update: %G", stmt);
> +         }
> +     }
> +
> +      /* Save destination as we go, BB are visited in order and the last one
> +     is where statements should be moved to.  */
> +      if (!dest_bb)
> +     dest_bb = gimple_bb (c);
> +      else
> +     {
> +       basic_block curr_bb = gimple_bb (c);
> +       if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
> +         dest_bb = curr_bb;
> +     }
> +
> +      /* Mark the statement as a condition.  */
> +      STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def;
> +    }
> +
> +  basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest;
> +  basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest;
> +  dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1;
> +  /* We don't allow outer -> inner loop transitions which should have been
> +     trapped already during loop form analysis.  */
> +  gcc_assert (dest_bb->loop_father == loop);
> +
> +  gcc_assert (dest_bb);
> +  LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
> +
> +  if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
> +    {
> +      /* All uses shall be updated to that of the first load.  Entries are
> +      stored in reverse order.  */
> +      tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last 
> ());
> +      for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
> +     {
> +       if (dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "will update use: %T, mem_ref: %G", vuse, g);
> +     }
> +    }
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "recorded statements to be moved to BB %d\n",
> +                  LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
> +
> +  return opt_result::success ();
> +}
> +
>  /* Function vect_analyze_data_ref_dependences.
>  
>     Examine all the data references in the loop, and make sure there do not
> @@ -657,6 +892,11 @@ vect_analyze_data_ref_dependences (loop_vec_info 
> loop_vinfo,
>         return res;
>        }
>  
> +  /* If we have early break statements in the loop, check to see if they
> +     are of a form we can vectorizer.  */
> +  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +    return vect_analyze_early_break_dependences (loop_vinfo);
> +
>    return opt_result::success ();
>  }
>  
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 
> fb8d999ee6bfaff551ac06ac2f3aea5354914659..900826567fee36206c0711ea51495602a7a031a1
>  100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -1040,6 +1040,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
> vec_info_shared *shared)
>      partial_load_store_bias (0),
>      peeling_for_gaps (false),
>      peeling_for_niter (false),
> +    early_breaks (false),
>      no_data_dependencies (false),
>      has_mask_store (false),
>      scalar_loop_scaling (profile_probability::uninitialized ()),
> @@ -11548,6 +11549,56 @@ update_epilogue_loop_vinfo (class loop *epilogue, 
> tree advance)
>    epilogue_vinfo->shared->save_datarefs ();
>  }
>  
> +/*  When vectorizing early break statements instructions that happen before
> +    the early break in the current BB need to be moved to after the early
> +    break.  This function deals with that and assumes that any validity
> +    checks has already been performed.
> +
> +    While moving the instructions if it encounters a VUSE or VDEF it then
> +    corrects the VUSES as it moves the statements along.  GDEST is the 
> location
> +    in which to insert the new statements.  */
> +
> +static void
> +move_early_exit_stmts (loop_vec_info loop_vinfo)
> +{
> +  DUMP_VECT_SCOPE ("move_early_exit_stmts");
> +
> +  if (LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).is_empty ())
> +    return;
> +
> +  /* Move all stmts that need moving.  */
> +  basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
> +  gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb);
> +
> +  for (gimple *stmt : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo))
> +    {
> +      /* Check to see if statement is still required for vect or has been
> +      elided.  */
> +      auto stmt_info = loop_vinfo->lookup_stmt (stmt);
> +      if (!stmt_info)
> +     continue;
> +
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
> +
> +      gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
> +      gsi_move_before (&stmt_gsi, &dest_gsi);
> +      gsi_prev (&dest_gsi);
> +    }
> +
> +  /* Update all the stmts with their new reaching VUSES.  */
> +  tree vuse
> +    = gimple_vuse (LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).last ());
> +  for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "updating vuse to %T for load %G", vuse, p);
> +      gimple_set_vuse (p, vuse);
> +      update_stmt (p);
> +    }
> +}
> +
>  /* Function vect_transform_loop.
>  
>     The analysis phase has determined that the loop is vectorizable.
> @@ -11697,6 +11748,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, 
> gimple *loop_vectorized_call)
>        vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
>      }
>  
> +  /* Handle any code motion that we need to for early-break vectorization 
> after
> +     we've done peeling but just before we start vectorizing.  */
> +  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +    move_early_exit_stmts (loop_vinfo);
> +
>    /* FORNOW: the vectorizer supports only loops which body consist
>       of one basic block (header + empty latch). When the vectorizer will
>       support more involved loop forms, the order by which the BBs are
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 
> 96e4a6cffadebb43946c5cb7e9849c915da589bc..b3a09c0a804a38e17ef32b6ce13b98b077459fc7
>  100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -359,8 +359,8 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
> loop_vec_info loop_vinfo,
>    *live_p = false;
>  
>    /* cond stmt other than loop exit cond.  */
> -  if (is_ctrl_stmt (stmt_info->stmt)
> -      && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
> +  gimple *stmt = STMT_VINFO_STMT (stmt_info);
> +  if (dyn_cast <gcond *> (stmt))
>      *relevant = vect_used_in_scope;
>  
>    /* changing memory.  */
> @@ -13530,6 +13530,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, 
> enum vect_def_type *dt,
>       case vect_first_order_recurrence:
>         dump_printf (MSG_NOTE, "first order recurrence\n");
>         break;
> +     case vect_condition_def:
> +       dump_printf (MSG_NOTE, "control flow\n");
> +       break;
>       case vect_unknown_def_type:
>         dump_printf (MSG_NOTE, "unknown\n");
>         break;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> e4d7ab4567cef3c018b958f98eeff045d3477725..744cdc86c969a62574be488df4f9c222b68f7994
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -66,6 +66,7 @@ enum vect_def_type {
>    vect_double_reduction_def,
>    vect_nested_cycle,
>    vect_first_order_recurrence,
> +  vect_condition_def,
>    vect_unknown_def_type
>  };
>  
> @@ -888,6 +889,10 @@ public:
>       we need to peel off iterations at the end to form an epilogue loop.  */
>    bool peeling_for_niter;
>  
> +  /* When the loop has early breaks that we can vectorize we need to peel
> +     the loop for the break finding loop.  */
> +  bool early_breaks;
> +
>    /* List of loop additional IV conditionals found in the loop.  */
>    auto_vec<gcond *> conds;
>  
> @@ -942,6 +947,20 @@ public:
>    /* The controlling loop IV for the scalar loop being vectorized.  This IV
>       controls the natural exits of the loop.  */
>    edge scalar_loop_iv_exit;
> +
> +  /* Used to store the list of stores needing to be moved if doing early
> +     break vectorization as they would violate the scalar loop semantics if
> +     vectorized in their current location.  These are stored in order that 
> they
> +     need to be moved.  */
> +  auto_vec<gimple *> early_break_stores;
> +
> +  /* The final basic block where to move statements to.  In the case of
> +     multiple exits this could be pretty far away.  */
> +  basic_block early_break_dest_bb;
> +
> +  /* Statements whose VUSES need updating if early break vectorization is to
> +     happen.  */
> +  auto_vec<gimple*> early_break_vuses;
>  } *loop_vec_info;
>  
>  /* Access Functions.  */
> @@ -996,6 +1015,10 @@ public:
>  #define LOOP_VINFO_REDUCTION_CHAINS(L)     (L)->reduction_chains
>  #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
>  #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
> +#define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
> +#define LOOP_VINFO_EARLY_BRK_STORES(L)     (L)->early_break_stores
> +#define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
> +#define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
>  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
>  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
>  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> @@ -2298,6 +2321,9 @@ extern opt_result vect_get_vector_types_for_stmt 
> (vec_info *,
>                                                 tree *, unsigned int = 0);
>  extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 
> 0);
>  
> +/* In tree-if-conv.cc.  */
> +extern bool ref_within_array_bound (gimple *, tree);
> +
>  /* In tree-vect-data-refs.cc.  */
>  extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
>  extern enum dr_alignment_support vect_supportable_dr_alignment
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to