On Fri, Jul 21, 2023 at 1:53 PM Jan Hubicka via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
> currently loop-ch skips all do-while loops.  But when loop is not do-while
> in addition to original goal of turining it to do-while it can do additional
> things:
>  1) move out loop invariant computations
>  2) duplicate loop invariant conditionals and eliminate them in loop body.
>  3) prove that some exits are always true in first iteration
>     and can be skipped
>
> Most of time 1 can be done by lim (exception is when the invariant computation
> is conditional). For 2 we however don't really have other place doing it 
> except
> for loop unswitching that is more expensive (it will duplicate the loop and
> then optimize out one path to non-loop).
> 3 can be done by loop peeling but it is also more expensive by duplicating 
> full
> loop body.
>
> This patch improves heuristics by not giving up on do-while loops and trying
> to find sequence of BBs to duplicate to obtain one of goals:
>  - turn loop to do-while
>  - eliminate invariant conditional in loop body
>  - do partial "peeling" as long as code optimizes enough so this does not
>    increase code size.
> This can be improved upon, but I think this patch should finally get
> heuristics into shape that it does not do weird things.
>
> The patch requires bit of testsuite changes
>  - I disabled ch in loop-unswitch-17.c since it tests unswitching of
>    loop invariant conditional.
>  - pr103079.c needs ch disabled to trigger vrp situation it tests for
>    (otherwise we optimize stuff earlier and better)
>  - copy-headers-7.c now gets only 2 basic blocks duplicated since
>    last conditional does not seem to benefit from duplicating,
>    so I reordered them.
> copy-headers-9 tests the new logic.
>
> Bootstrapped/regtested x86_64-linux, OK?

OK.  In case the size heuristics are a bit too optimistic we could avoid the
peeling in the -Os case?  Did you do any stats on TUs to see whether code
actually increases in the end?

Thanks,
Richard.

> gcc/ChangeLog:
>
>         * tree-ssa-loop-ch.cc (enum ch_decision): New enum.
>         (should_duplicate_loop_header_p): Return info on profitability.
>         (do_while_loop_p): Watch for constant conditionals.
>         (update_profile_after_ch): Do not sanity check that all
>         static exits are taken.
>         (ch_base::copy_headers): Run on all loops.
>         (pass_ch::process_loop_p): Improve heuristics by handling also
>         do_while loop and duplicating shortest sequence containing all
>         winning blocks.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/loop-unswitch-17.c: Disable ch.
>         * gcc.dg/pr103079.c: Disable ch.
>         * gcc.dg/tree-ssa/copy-headers-7.c: Update so ch behaves
>         as expected.
>         * gcc.dg/tree-ssa/copy-headers.c: Update template.
>         * gcc.dg/tree-ssa/copy-headers-9.c: New test.
>
> diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-17.c 
> b/gcc/testsuite/gcc.dg/loop-unswitch-17.c
> index 8655e09a51c..4b806c475b1 100644
> --- a/gcc/testsuite/gcc.dg/loop-unswitch-17.c
> +++ b/gcc/testsuite/gcc.dg/loop-unswitch-17.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
> +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized 
> -fno-tree-ch" } */
>
>  int foo (int a)
>  {
> diff --git a/gcc/testsuite/gcc.dg/pr103079.c b/gcc/testsuite/gcc.dg/pr103079.c
> index 7f6632fc669..7b107544725 100644
> --- a/gcc/testsuite/gcc.dg/pr103079.c
> +++ b/gcc/testsuite/gcc.dg/pr103079.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-Os -fdump-tree-vrp2" } */
> +/* { dg-options "-Os -fdump-tree-vrp2 -fno-tree-ch" } */
>
>  int a, b = -2;
>  int main() {
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c
> index e2a6c75f2e9..b3df3b6398e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c
> @@ -4,7 +4,7 @@
>  int is_sorted(int *a, int n, int m, int k)
>  {
>    if (k > 0)
> -    for (int i = 0; i < n - 1 && m && k > i; i++)
> +    for (int i = 0; k > i && m && i < n - 1 ; i++)
>        if (a[i] > a[i + 1])
>         return 0;
>    return 1;
> @@ -17,5 +17,4 @@ int is_sorted(int *a, int n, int m, int k)
>  /* { dg-final { scan-tree-dump-times "Conditional combines static and 
> invariant" 0 "ch2" } } */
>  /* { dg-final { scan-tree-dump-times "Will elliminate invariant exit" 1 
> "ch2" } } */
>  /* { dg-final { scan-tree-dump-times "Will eliminate peeled conditional" 1 
> "ch2" } } */
> -/* { dg-final { scan-tree-dump-times "Not duplicating bb .: condition based 
> on non-IV loop variant." 1 "ch2" } } */
>  /* { dg-final { scan-tree-dump-times "Will duplicate bb" 3 "ch2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c
> new file mode 100644
> index 00000000000..7cc162ca94d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ch-details" } */
> +int a[100];
> +void test (int m, int n)
> +{
> +       int i = 0;
> +       do
> +       {
> +               if (m)
> +                       break;
> +               i++;
> +               a[i]=0;
> +       }
> +       while (i<10);
> +}
> +/* { dg-final { scan-tree-dump-times "Duplicating bb . is a win" 1 "ch2" } } 
> */
> +/* { dg-final { scan-tree-dump-times "May duplicate bb" 1 "ch2" } } */
> +/* { dg-final { scan-tree-dump-times "Duplicating additional BB to obtain 
> do-while loop" 1 "ch2" } } */
> +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 2 "ch2" } } */
> +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c
> index a5a82121ff2..f4f2b2aa70b 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c
> @@ -12,4 +12,4 @@ void bla (void)
>  }
>
>  /* There should be a header duplicated.  */
> -/* { dg-final { scan-tree-dump-times "Duplicating header" 1 "ch2"} } */
> +/* { dg-final { scan-tree-dump-times "Duplicating header of the" 1 "ch2"} } 
> */
> diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc
> index a87ebc58e3d..6cdb87a762f 100644
> --- a/gcc/tree-ssa-loop-ch.cc
> +++ b/gcc/tree-ssa-loop-ch.cc
> @@ -168,11 +168,28 @@ loop_combined_static_and_iv_p (class loop *loop,
>    return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4;
>  }
>
> +/* Decision about posibility of copying a given header.  */
> +
> +enum ch_decision
> +{
> +  /* We do not want to copy this header.  */
> +  ch_impossible,
> +  /* We can copy it if it enables wins.  */
> +  ch_possible,
> +  /* We can "copy" it if it enables wins and doing
> +     so will introduce no new code.  */
> +  ch_possible_zero_cost,
> +  /* We want to copy.  */
> +  ch_win,
> +  /* We want to copy and we will eliminate loop exit.  */
> +  ch_win_invariant_exit
> +};
> +
>  /* Check whether we should duplicate HEADER of LOOP.  At most *LIMIT
>     instructions should be duplicated, limit is decreased by the actual
>     amount.  */
>
> -static bool
> +static ch_decision
>  should_duplicate_loop_header_p (basic_block header, class loop *loop,
>                                 gimple_ranger *ranger,
>                                 int *limit,
> @@ -190,7 +207,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>         fprintf (dump_file,
>                  "  Not duplicating bb %i: it is single succ.\n",
>                  header->index);
> -      return false;
> +      return ch_impossible;
>      }
>
>    if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
> @@ -200,7 +217,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>         fprintf (dump_file,
>                  "  Not duplicating bb %i: both successors are in loop.\n",
>                  loop->num);
> -      return false;
> +      return ch_impossible;
>      }
>
>    /* If this is not the original loop header, we want it to have just
> @@ -211,7 +228,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>         fprintf (dump_file,
>                  "  Not duplicating bb %i: it has mutiple predecestors.\n",
>                  header->index);
> -      return false;
> +      return ch_impossible;
>      }
>
>    gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (header));
> @@ -221,7 +238,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>         fprintf (dump_file,
>                  "  Not duplicating bb %i: it does not end by conditional.\n",
>                  header->index);
> -      return false;
> +      return ch_impossible;
>      }
>
>    path_range_query *query = NULL;
> @@ -233,6 +250,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>                                             query, psi.phi ());
>         gimple_set_uid (psi.phi (), static_p ? 2 : 0);
>        }
> +  bool code_size_cost = false;
>
>    /* Count number of instructions and punt on calls.
>       Populate stmts INV flag to later apply heuristics to the
> @@ -268,7 +286,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>                      header->index);
>           if (query)
>             delete query;
> -         return false;
> +         return ch_impossible;
>         }
>
>        /* Classify the stmt is invariant in the loop.  */
> @@ -343,6 +361,8 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>
>        int insns = estimate_num_insns (last, &eni_size_weights);
>        *limit -= insns;
> +      if (insns)
> +       code_size_cost = true;
>        if (dump_file && (dump_flags & TDF_DETAILS))
>         fprintf (dump_file,
>                  "    Acconting stmt as %i insns\n", insns);
> @@ -354,7 +374,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>                      header->index);
>           if (query)
>             delete query;
> -         return false;
> +         return ch_impossible;
>         }
>      }
>
> @@ -403,7 +423,7 @@ should_duplicate_loop_header_p (basic_block header, class 
> loop *loop,
>        if (dump_file && (dump_flags & TDF_DETAILS))
>         fprintf (dump_file,
>                  "  Conditional combines static and invariant op.\n");
> -     return true;
> +     return ch_win;
>     }
>
>    edge static_exit = static_loop_exit (loop, header, *ranger, query);
> @@ -435,11 +455,16 @@ should_duplicate_loop_header_p (basic_block header, 
> class loop *loop,
>                 invariant_exits->add (e);
>               }
>         }
> -      return true;
> +      return ch_win_invariant_exit;
>      }
>
> +  /* If the static exit fully optimize out, it is win to "duplicate"
> +     it.
> +
> +     TODO: Even if duplication costs some size we may opt to do so in case
> +     exit probability is significant enough (do partial peeling).  */
>    if (static_exit)
> -    return true;
> +    return code_size_cost ? ch_possible_zero_cost : ch_win;
>
>    /* We was not able to prove that conditional will be eliminated.  */
>    int insns = estimate_num_insns (last, &eni_size_weights);
> @@ -453,19 +478,10 @@ should_duplicate_loop_header_p (basic_block header, 
> class loop *loop,
>         fprintf (dump_file,
>                  "  Not duplicating bb %i contains too many insns.\n",
>                  header->index);
> -      return false;
> -    }
> -
> -  if (header != loop->header)
> -    {
> -      if (dump_file && (dump_flags & TDF_DETAILS))
> -       fprintf (dump_file,
> -                "  Not duplicating bb %i: condition based on non-IV loop"
> -                " variant.\n", header->index);
> -      return false;
> +      return ch_impossible;
>      }
>
> -  return true;
> +  return ch_possible;
>  }
>
>  /* Checks whether LOOP is a do-while style loop.  */
> @@ -496,10 +512,11 @@ do_while_loop_p (class loop *loop)
>                  "predecessors.\n", loop->num);
>        return false;
>      }
> +  basic_block pred = single_pred (loop->latch);
>
>    /* If the latch predecessor doesn't exit the loop, it is not a
>       do-while loop.  */
> -  if (!loop_exits_from_bb_p (loop, single_pred (loop->latch)))
> +  if (!loop_exits_from_bb_p (loop, pred))
>      {
>        if (dump_file && (dump_flags & TDF_DETAILS))
>         fprintf (dump_file,
> @@ -507,6 +524,18 @@ do_while_loop_p (class loop *loop)
>                  "does not exit loop.\n", loop->num);
>        return false;
>      }
> +  gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (pred));
> +  if (last
> +      && (gimple_cond_lhs (last) == boolean_false_node
> +         || gimple_cond_lhs (last) == boolean_true_node)
> +      && gimple_cond_rhs (last) == boolean_false_node)
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file,
> +                "Loop %i is not do-while loop: latch predecessor "
> +                "contains exit we optimized out.\n", loop->num);
> +      return false;
> +    }
>
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      fprintf (dump_file, "Loop %i is do-while loop\n", loop->num);
> @@ -634,9 +663,9 @@ update_profile_after_ch (class loop *loop,
>         }
>        entry_count = e_copy->count ();
>      }
> -  /* Be sure that we seen all edges we are supposed to update.  */
> -  gcc_checking_assert (static_exits->is_empty ()
> -                      && invariant_exits->is_empty ());
> +  /* Be sure that we seen all invariant exit edges we are supposed to update.
> +     We may have recorded some static exists we decided to not duplicate.  */
> +  gcc_checking_assert (invariant_exits->is_empty ());
>  }
>
>  namespace {
> @@ -792,16 +821,43 @@ ch_base::copy_headers (function *fun)
>          the header to have just a single successor and copying up to
>          postdominator.  */
>        int nheaders = 0;
> +      int last_win_nheaders = 0;
> +      bool last_win_invariant_exit = false;
> +      ch_decision ret;
>        hash_set <edge> *invariant_exits = new hash_set <edge>;
>        hash_set <edge> *static_exits = new hash_set <edge>;
> -      while (should_duplicate_loop_header_p (header, loop, ranger,
> -                                            &remaining_limit,
> -                                            invariant_exits,
> -                                            static_exits))
> +      while ((ret = should_duplicate_loop_header_p (header, loop, ranger,
> +                                                   &remaining_limit,
> +                                                   invariant_exits,
> +                                                   static_exits))
> +            != ch_impossible)
>         {
>           nheaders++;
> -         if (dump_file && (dump_flags & TDF_DETAILS))
> -           fprintf (dump_file, "    Will duplicate bb %i\n", header->index);
> +         if (ret >= ch_win)
> +           {
> +             last_win_nheaders = nheaders;
> +             last_win_invariant_exit = (ret == ch_win_invariant_exit);
> +             if (dump_file && (dump_flags & TDF_DETAILS))
> +               fprintf (dump_file, "    Duplicating bb %i is a win\n",
> +                        header->index);
> +           }
> +         /* Duplicate BB if has zero cost but be sure it will not
> +            imply duplication of other BBs.  */
> +         else if (ret == ch_possible_zero_cost
> +                  && (last_win_nheaders == nheaders - 1
> +                      || (last_win_nheaders == nheaders - 2
> +                          && last_win_invariant_exit)))
> +           {
> +             last_win_nheaders = nheaders;
> +             last_win_invariant_exit = false;
> +             if (dump_file && (dump_flags & TDF_DETAILS))
> +               fprintf (dump_file,
> +                        "    Duplicating bb %i is a win; it has zero cost\n",
> +                        header->index);
> +           }
> +         else
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             fprintf (dump_file, "    May duplicate bb %i\n", header->index);
>
>           /* Find a successor of header that is inside a loop; i.e. the new
>              header after the condition is copied.  */
> @@ -811,8 +867,27 @@ ch_base::copy_headers (function *fun)
>             header = EDGE_SUCC (header, 1)->dest;
>         }
>
> -      if (nheaders)
> -       candidates.safe_push ({loop, nheaders, invariant_exits, 
> static_exits});
> +      /* Try to turn loop into do-while.  This means ensuring that
> +        last duplicated header will not have loop invariant exit.  */
> +      if (last_win_nheaders && last_win_invariant_exit
> +         && nheaders > last_win_nheaders)
> +       {
> +         last_win_nheaders++;
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           fprintf (dump_file,
> +                    "    Duplicating additional BB to obtain do-while 
> loop\n");
> +       }
> +      else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop))
> +       {
> +         last_win_nheaders = 1;
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           fprintf (dump_file,
> +                    "    Duplicating header BB to obtain do-while loop\n");
> +       }
> +
> +      if (last_win_nheaders)
> +       candidates.safe_push ({loop, last_win_nheaders,
> +                             invariant_exits, static_exits});
>        else
>         {
>           delete invariant_exits;
> @@ -844,6 +919,8 @@ ch_base::copy_headers (function *fun)
>           entry_count += e->count ();
>        while (n_bbs < candidate.nheaders)
>         {
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             fprintf (dump_file, "    Will duplicate bb %i\n", 
> header->index);
>           /* Find a successor of header that is inside a loop; i.e. the new
>              header after the condition is copied.  */
>           if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
> @@ -1111,9 +1188,9 @@ pass_ch_vect::execute (function *fun)
>  /* Apply header copying according to a very simple test of do-while shape.  
> */
>
>  bool
> -pass_ch::process_loop_p (class loop *loop)
> +pass_ch::process_loop_p (class loop *)
>  {
> -  return !do_while_loop_p (loop);
> +  return true;
>  }
>
>  /* Apply header-copying to loops where we might enable vectorization.  */

Reply via email to