Hi! This is an attempt to extend RTL unroller to allow cases like mentioned in the PR - namely when loop has duplicated exit blocks and back edges.
Bootstrapped and regtested on x86_64, also checking wide range of benchmarks - spec2K, spec2006, EEMBC Is it ok for trunk in case if no testing issues? Thanks, Igor Changelog: Gcc: 2014-12-19 Igor Zamyatin <igor.zamya...@intel.com> PR rtl-optimization/64081 * loop-iv.c (def_pred_latch_p): New function. (latch_dominating_def): Allow specific cases with non-single definitions. (iv_get_reaching_def): Likewise. (check_complex_exit_p): New function. (check_simple_exit): Use check_complex_exit_p to allow certain cases with exits not executing on any iteration. Testsuite: 2014-12-19 Igor Zamyatin <igor.zamya...@intel.com> PR rtl-optimization/64081 * gcc.dg/pr64081.c: New test. diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index f55cea2..d5d48f1 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -314,34 +314,67 @@ iv_analysis_loop_init (struct loop *loop) check_iv_ref_table_size (); } +/* Checks that def is in an immediate predecessor of the latch block. */ + +static bool +def_pred_latch_p (df_ref d_ref) +{ + basic_block bb = DF_REF_BB (d_ref); + edge_iterator ei; + edge e; + + FOR_EACH_EDGE (e, ei, current_loop->latch->preds) + { + if (e->src == bb) + return true; + } + return false; +} + /* Finds the definition of REG that dominates loop latch and stores it to DEF. Returns false if there is not a single definition - dominating the latch. If REG has no definition in loop, DEF + dominating the latch or all defs are same and they are on different + predecessors of loop latch. If REG has no definition in loop, DEF is set to NULL and true is returned. */ static bool latch_dominating_def (rtx reg, df_ref *def) { df_ref single_rd = NULL, adef; - unsigned regno = REGNO (reg); + unsigned regno = REGNO (reg), def_num = 0; struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (current_loop->latch); for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef)) { + /* Initialize this to true for the very first iteration when + SINGLE_RD is NULL. */ + bool def_pred_latch = true; + if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef)) || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef))) continue; - /* More than one reaching definition. */ + /* More than one reaching definition is ok in case definitions are + in predecessors of latch block and those definitions are the same. + Probably this could be relaxed and check for sub-dominance instead + predecessor. */ if (single_rd) - return false; - - if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef))) - return false; + { + def_num++; + if (!(def_pred_latch = def_pred_latch_p (adef)) + || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)), + PATTERN (DF_REF_INSN (adef)))) + return false; + } single_rd = adef; } + /* If we have single definition it has to be excuted on each iteration. */ + if (!def_num && single_rd + && !just_once_each_iteration_p (current_loop, DF_REF_BB (single_rd))) + return false; + *def = single_rd; return true; } @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def) static enum iv_grd_result iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) { - df_ref use, adef; + df_ref use, adef = NULL; basic_block def_bb, use_bb; rtx_insn *def_insn; - bool dom_p; + bool dom_p, dom_latch_p = false; *def = NULL; if (!simple_reg_p (reg)) @@ -369,11 +402,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) if (!DF_REF_CHAIN (use)) return GRD_INVARIANT; - /* More than one reaching def. */ + adef = DF_REF_CHAIN (use)->ref; + /* Having more than one reaching def is ok once all defs are in blocks which + are latch's predecessors. */ if (DF_REF_CHAIN (use)->next) - return GRD_INVALID; + { + df_link* defs; + unsigned int def_num = 0; - adef = DF_REF_CHAIN (use)->ref; + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) + { + if (!def_pred_latch_p (defs->ref)) + return GRD_INVALID; + def_num++; + } + /* Make sure all preds contain definitions. */ + if (def_num != EDGE_COUNT (current_loop->latch->preds)) + return GRD_INVALID; + + dom_latch_p = true; + } /* We do not handle setting only part of the register. */ if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE) @@ -396,8 +444,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) /* The definition does not dominate the use. This is still OK if this may be a use of a biv, i.e. if the def_bb dominates loop - latch. */ - if (just_once_each_iteration_p (current_loop, def_bb)) + latch or all defs are in latch's predecessors. */ + if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb)) return GRD_MAYBE_BIV; return GRD_INVALID; @@ -2910,6 +2958,49 @@ fail: return; } +/* Check whether exit is not simple but still good for further analysis. + Looks like such loops mostly are a result of jump threading. */ + +static bool +check_complex_exit_p (struct loop* loop, basic_block bb) +{ + edge e; + basic_block pred, exit; + + if (EDGE_COUNT (bb->preds) > 1) + return false; + + e = EDGE_PRED (bb, 0); + + pred = e->src; + if (EDGE_COUNT (pred->succs) != 2) + return false; + + /* Predecessor must be tested (at least) once during any iteration. */ + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred)) + return false; + + if (EDGE_SUCC (pred, 0)->dest == bb) + exit = EDGE_SUCC (pred, 1)->dest; + else + exit = EDGE_SUCC (pred, 0)->dest; + + /* Check that EXIT is really loop exit. */ + if (flow_bb_inside_loop_p (loop, exit)) + { + edge_iterator eei; + edge ee; + + FOR_EACH_EDGE (ee, eei, exit->succs) + { + if (!flow_bb_inside_loop_p (loop, ee->dest)) + return true; + } + } + return false; + +} + /* Checks whether E is a simple exit from LOOP and stores its description into DESC. */ @@ -2929,7 +3020,8 @@ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc) return; /* It must be tested (at least) once during any iteration. */ - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)) + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb) + && !check_complex_exit_p (loop, exit_bb)) return; /* It must end in a simple conditional jump. */ diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c new file mode 100644 index 0000000..6151d00 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr64081.c @@ -0,0 +1,41 @@ +/* PR rtl-optimization/64081 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */ + +long token; +long *data; +long *arr1; +long *arr2; + +int main (int argc, const char* argv[]) +{ + unsigned long i; + static long pos, start, count; + static int dir; + + pos = 0; + dir = 1; + + for (i = 0; i < argc; i++) + { + start = pos; + for (count = 0; count <= 400; count++) + { + if (token == data[pos]) + break; + if (dir == 1) + pos = arr1[pos]; + else + pos = arr2[pos]; + if (pos == -1) + { + pos = start; + dir = !dir; + } + } + } + return pos + dir; +} + +/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */ +/* { dg-final { cleanup-rtl-dump "loop*" } } */