On Fri, Jul 21, 2023 at 1:53 PM Jan Hubicka via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > currently loop-ch skips all do-while loops. But when loop is not do-while > in addition to original goal of turining it to do-while it can do additional > things: > 1) move out loop invariant computations > 2) duplicate loop invariant conditionals and eliminate them in loop body. > 3) prove that some exits are always true in first iteration > and can be skipped > > Most of time 1 can be done by lim (exception is when the invariant computation > is conditional). For 2 we however don't really have other place doing it > except > for loop unswitching that is more expensive (it will duplicate the loop and > then optimize out one path to non-loop). > 3 can be done by loop peeling but it is also more expensive by duplicating > full > loop body. > > This patch improves heuristics by not giving up on do-while loops and trying > to find sequence of BBs to duplicate to obtain one of goals: > - turn loop to do-while > - eliminate invariant conditional in loop body > - do partial "peeling" as long as code optimizes enough so this does not > increase code size. > This can be improved upon, but I think this patch should finally get > heuristics into shape that it does not do weird things. > > The patch requires bit of testsuite changes > - I disabled ch in loop-unswitch-17.c since it tests unswitching of > loop invariant conditional. > - pr103079.c needs ch disabled to trigger vrp situation it tests for > (otherwise we optimize stuff earlier and better) > - copy-headers-7.c now gets only 2 basic blocks duplicated since > last conditional does not seem to benefit from duplicating, > so I reordered them. > copy-headers-9 tests the new logic. > > Bootstrapped/regtested x86_64-linux, OK?
OK. In case the size heuristics are a bit too optimistic we could avoid the peeling in the -Os case? Did you do any stats on TUs to see whether code actually increases in the end? Thanks, Richard. > gcc/ChangeLog: > > * tree-ssa-loop-ch.cc (enum ch_decision): New enum. > (should_duplicate_loop_header_p): Return info on profitability. > (do_while_loop_p): Watch for constant conditionals. > (update_profile_after_ch): Do not sanity check that all > static exits are taken. > (ch_base::copy_headers): Run on all loops. > (pass_ch::process_loop_p): Improve heuristics by handling also > do_while loop and duplicating shortest sequence containing all > winning blocks. > > gcc/testsuite/ChangeLog: > > * gcc.dg/loop-unswitch-17.c: Disable ch. > * gcc.dg/pr103079.c: Disable ch. > * gcc.dg/tree-ssa/copy-headers-7.c: Update so ch behaves > as expected. > * gcc.dg/tree-ssa/copy-headers.c: Update template. > * gcc.dg/tree-ssa/copy-headers-9.c: New test. > > diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-17.c > b/gcc/testsuite/gcc.dg/loop-unswitch-17.c > index 8655e09a51c..4b806c475b1 100644 > --- a/gcc/testsuite/gcc.dg/loop-unswitch-17.c > +++ b/gcc/testsuite/gcc.dg/loop-unswitch-17.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */ > +/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized > -fno-tree-ch" } */ > > int foo (int a) > { > diff --git a/gcc/testsuite/gcc.dg/pr103079.c b/gcc/testsuite/gcc.dg/pr103079.c > index 7f6632fc669..7b107544725 100644 > --- a/gcc/testsuite/gcc.dg/pr103079.c > +++ b/gcc/testsuite/gcc.dg/pr103079.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-Os -fdump-tree-vrp2" } */ > +/* { dg-options "-Os -fdump-tree-vrp2 -fno-tree-ch" } */ > > int a, b = -2; > int main() { > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c > index e2a6c75f2e9..b3df3b6398e 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c > @@ -4,7 +4,7 @@ > int is_sorted(int *a, int n, int m, int k) > { > if (k > 0) > - for (int i = 0; i < n - 1 && m && k > i; i++) > + for (int i = 0; k > i && m && i < n - 1 ; i++) > if (a[i] > a[i + 1]) > return 0; > return 1; > @@ -17,5 +17,4 @@ int is_sorted(int *a, int n, int m, int k) > /* { dg-final { scan-tree-dump-times "Conditional combines static and > invariant" 0 "ch2" } } */ > /* { dg-final { scan-tree-dump-times "Will elliminate invariant exit" 1 > "ch2" } } */ > /* { dg-final { scan-tree-dump-times "Will eliminate peeled conditional" 1 > "ch2" } } */ > -/* { dg-final { scan-tree-dump-times "Not duplicating bb .: condition based > on non-IV loop variant." 1 "ch2" } } */ > /* { dg-final { scan-tree-dump-times "Will duplicate bb" 3 "ch2" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c > new file mode 100644 > index 00000000000..7cc162ca94d > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-9.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-ch-details" } */ > +int a[100]; > +void test (int m, int n) > +{ > + int i = 0; > + do > + { > + if (m) > + break; > + i++; > + a[i]=0; > + } > + while (i<10); > +} > +/* { dg-final { scan-tree-dump-times "Duplicating bb . is a win" 1 "ch2" } } > */ > +/* { dg-final { scan-tree-dump-times "May duplicate bb" 1 "ch2" } } */ > +/* { dg-final { scan-tree-dump-times "Duplicating additional BB to obtain > do-while loop" 1 "ch2" } } */ > +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 2 "ch2" } } */ > +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c > index a5a82121ff2..f4f2b2aa70b 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers.c > @@ -12,4 +12,4 @@ void bla (void) > } > > /* There should be a header duplicated. */ > -/* { dg-final { scan-tree-dump-times "Duplicating header" 1 "ch2"} } */ > +/* { dg-final { scan-tree-dump-times "Duplicating header of the" 1 "ch2"} } > */ > diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc > index a87ebc58e3d..6cdb87a762f 100644 > --- a/gcc/tree-ssa-loop-ch.cc > +++ b/gcc/tree-ssa-loop-ch.cc > @@ -168,11 +168,28 @@ loop_combined_static_and_iv_p (class loop *loop, > return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4; > } > > +/* Decision about posibility of copying a given header. */ > + > +enum ch_decision > +{ > + /* We do not want to copy this header. */ > + ch_impossible, > + /* We can copy it if it enables wins. */ > + ch_possible, > + /* We can "copy" it if it enables wins and doing > + so will introduce no new code. */ > + ch_possible_zero_cost, > + /* We want to copy. */ > + ch_win, > + /* We want to copy and we will eliminate loop exit. */ > + ch_win_invariant_exit > +}; > + > /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT > instructions should be duplicated, limit is decreased by the actual > amount. */ > > -static bool > +static ch_decision > should_duplicate_loop_header_p (basic_block header, class loop *loop, > gimple_ranger *ranger, > int *limit, > @@ -190,7 +207,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > fprintf (dump_file, > " Not duplicating bb %i: it is single succ.\n", > header->index); > - return false; > + return ch_impossible; > } > > if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) > @@ -200,7 +217,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > fprintf (dump_file, > " Not duplicating bb %i: both successors are in loop.\n", > loop->num); > - return false; > + return ch_impossible; > } > > /* If this is not the original loop header, we want it to have just > @@ -211,7 +228,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > fprintf (dump_file, > " Not duplicating bb %i: it has mutiple predecestors.\n", > header->index); > - return false; > + return ch_impossible; > } > > gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (header)); > @@ -221,7 +238,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > fprintf (dump_file, > " Not duplicating bb %i: it does not end by conditional.\n", > header->index); > - return false; > + return ch_impossible; > } > > path_range_query *query = NULL; > @@ -233,6 +250,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > query, psi.phi ()); > gimple_set_uid (psi.phi (), static_p ? 2 : 0); > } > + bool code_size_cost = false; > > /* Count number of instructions and punt on calls. > Populate stmts INV flag to later apply heuristics to the > @@ -268,7 +286,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > header->index); > if (query) > delete query; > - return false; > + return ch_impossible; > } > > /* Classify the stmt is invariant in the loop. */ > @@ -343,6 +361,8 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > > int insns = estimate_num_insns (last, &eni_size_weights); > *limit -= insns; > + if (insns) > + code_size_cost = true; > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, > " Acconting stmt as %i insns\n", insns); > @@ -354,7 +374,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > header->index); > if (query) > delete query; > - return false; > + return ch_impossible; > } > } > > @@ -403,7 +423,7 @@ should_duplicate_loop_header_p (basic_block header, class > loop *loop, > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, > " Conditional combines static and invariant op.\n"); > - return true; > + return ch_win; > } > > edge static_exit = static_loop_exit (loop, header, *ranger, query); > @@ -435,11 +455,16 @@ should_duplicate_loop_header_p (basic_block header, > class loop *loop, > invariant_exits->add (e); > } > } > - return true; > + return ch_win_invariant_exit; > } > > + /* If the static exit fully optimize out, it is win to "duplicate" > + it. > + > + TODO: Even if duplication costs some size we may opt to do so in case > + exit probability is significant enough (do partial peeling). */ > if (static_exit) > - return true; > + return code_size_cost ? ch_possible_zero_cost : ch_win; > > /* We was not able to prove that conditional will be eliminated. */ > int insns = estimate_num_insns (last, &eni_size_weights); > @@ -453,19 +478,10 @@ should_duplicate_loop_header_p (basic_block header, > class loop *loop, > fprintf (dump_file, > " Not duplicating bb %i contains too many insns.\n", > header->index); > - return false; > - } > - > - if (header != loop->header) > - { > - if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, > - " Not duplicating bb %i: condition based on non-IV loop" > - " variant.\n", header->index); > - return false; > + return ch_impossible; > } > > - return true; > + return ch_possible; > } > > /* Checks whether LOOP is a do-while style loop. */ > @@ -496,10 +512,11 @@ do_while_loop_p (class loop *loop) > "predecessors.\n", loop->num); > return false; > } > + basic_block pred = single_pred (loop->latch); > > /* If the latch predecessor doesn't exit the loop, it is not a > do-while loop. */ > - if (!loop_exits_from_bb_p (loop, single_pred (loop->latch))) > + if (!loop_exits_from_bb_p (loop, pred)) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, > @@ -507,6 +524,18 @@ do_while_loop_p (class loop *loop) > "does not exit loop.\n", loop->num); > return false; > } > + gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (pred)); > + if (last > + && (gimple_cond_lhs (last) == boolean_false_node > + || gimple_cond_lhs (last) == boolean_true_node) > + && gimple_cond_rhs (last) == boolean_false_node) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + "Loop %i is not do-while loop: latch predecessor " > + "contains exit we optimized out.\n", loop->num); > + return false; > + } > > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "Loop %i is do-while loop\n", loop->num); > @@ -634,9 +663,9 @@ update_profile_after_ch (class loop *loop, > } > entry_count = e_copy->count (); > } > - /* Be sure that we seen all edges we are supposed to update. */ > - gcc_checking_assert (static_exits->is_empty () > - && invariant_exits->is_empty ()); > + /* Be sure that we seen all invariant exit edges we are supposed to update. > + We may have recorded some static exists we decided to not duplicate. */ > + gcc_checking_assert (invariant_exits->is_empty ()); > } > > namespace { > @@ -792,16 +821,43 @@ ch_base::copy_headers (function *fun) > the header to have just a single successor and copying up to > postdominator. */ > int nheaders = 0; > + int last_win_nheaders = 0; > + bool last_win_invariant_exit = false; > + ch_decision ret; > hash_set <edge> *invariant_exits = new hash_set <edge>; > hash_set <edge> *static_exits = new hash_set <edge>; > - while (should_duplicate_loop_header_p (header, loop, ranger, > - &remaining_limit, > - invariant_exits, > - static_exits)) > + while ((ret = should_duplicate_loop_header_p (header, loop, ranger, > + &remaining_limit, > + invariant_exits, > + static_exits)) > + != ch_impossible) > { > nheaders++; > - if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, " Will duplicate bb %i\n", header->index); > + if (ret >= ch_win) > + { > + last_win_nheaders = nheaders; > + last_win_invariant_exit = (ret == ch_win_invariant_exit); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, " Duplicating bb %i is a win\n", > + header->index); > + } > + /* Duplicate BB if has zero cost but be sure it will not > + imply duplication of other BBs. */ > + else if (ret == ch_possible_zero_cost > + && (last_win_nheaders == nheaders - 1 > + || (last_win_nheaders == nheaders - 2 > + && last_win_invariant_exit))) > + { > + last_win_nheaders = nheaders; > + last_win_invariant_exit = false; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + " Duplicating bb %i is a win; it has zero cost\n", > + header->index); > + } > + else > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, " May duplicate bb %i\n", header->index); > > /* Find a successor of header that is inside a loop; i.e. the new > header after the condition is copied. */ > @@ -811,8 +867,27 @@ ch_base::copy_headers (function *fun) > header = EDGE_SUCC (header, 1)->dest; > } > > - if (nheaders) > - candidates.safe_push ({loop, nheaders, invariant_exits, > static_exits}); > + /* Try to turn loop into do-while. This means ensuring that > + last duplicated header will not have loop invariant exit. */ > + if (last_win_nheaders && last_win_invariant_exit > + && nheaders > last_win_nheaders) > + { > + last_win_nheaders++; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + " Duplicating additional BB to obtain do-while > loop\n"); > + } > + else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop)) > + { > + last_win_nheaders = 1; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + " Duplicating header BB to obtain do-while loop\n"); > + } > + > + if (last_win_nheaders) > + candidates.safe_push ({loop, last_win_nheaders, > + invariant_exits, static_exits}); > else > { > delete invariant_exits; > @@ -844,6 +919,8 @@ ch_base::copy_headers (function *fun) > entry_count += e->count (); > while (n_bbs < candidate.nheaders) > { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, " Will duplicate bb %i\n", > header->index); > /* Find a successor of header that is inside a loop; i.e. the new > header after the condition is copied. */ > if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) > @@ -1111,9 +1188,9 @@ pass_ch_vect::execute (function *fun) > /* Apply header copying according to a very simple test of do-while shape. > */ > > bool > -pass_ch::process_loop_p (class loop *loop) > +pass_ch::process_loop_p (class loop *) > { > - return !do_while_loop_p (loop); > + return true; > } > > /* Apply header-copying to loops where we might enable vectorization. */