After reading the code in regcprop.c, I think I should reuse the copyprop_hardreg_forward_1. So rewrite the patch, which is much simple and should handle HAVE_cc0. But not sure we'd handle DEBUG_INSN or not.
2014-05-13 Zhenqiang Chen <zhenqiang.c...@linaro.org> * regcprop.c (skip_debug_insn_p): New decl. (replace_oldest_value_reg): Check skip_debug_insn_p. (copyprop_hardreg_forward_bb_without_debug_insn.): New function. * shrink-wrap.c (prepare_shrink_wrap): Call copyprop_hardreg_forward_bb_without_debug_insn. * function.h (copyprop_hardreg_forward_bb_without_debug_insn): New prototype. testsuite/ChangeLog: 2014-05-13 Zhenqiang Chen <zhenqiang.c...@linaro.org> * shrink-wrap-loop.c: New test case. diff --git a/gcc/regcprop.c b/gcc/regcprop.c index a710cc38..f76a944 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -77,6 +77,7 @@ struct value_data }; static alloc_pool debug_insn_changes_pool; +static bool skip_debug_insn_p = false; static void kill_value_one_regno (unsigned, struct value_data *); static void kill_value_regno (unsigned, unsigned, struct value_data *); @@ -485,7 +486,7 @@ replace_oldest_value_reg (rtx *loc, enum reg_class cl, rtx insn, struct value_data *vd) { rtx new_rtx = find_oldest_value_reg (cl, *loc, vd); - if (new_rtx) + if (new_rtx && (!DEBUG_INSN_P (insn) || !skip_debug_insn_p)) { if (DEBUG_INSN_P (insn)) { @@ -1112,6 +1113,26 @@ debug_value_data (struct value_data *vd) vd->e[i].next_regno); } +/* Do copyprop_hardreg_forward_1 for a single basic block BB. + DEBUG_INSN is skipped since we do not want to involve DF related + staff as how it is handled in function pass_cprop_hardreg::execute. + + NOTE: Currently it is only used for shrink-wrap. Maybe extend it + to handle DEBUG_INSN for other uses. */ + +void +copyprop_hardreg_forward_bb_without_debug_insn (basic_block bb) +{ + struct value_data *vd; + vd = XNEWVEC (struct value_data, 1); + init_value_data (vd); + + skip_debug_insn_p = true; + copyprop_hardreg_forward_1 (bb, vd); + free (vd); + skip_debug_insn_p = false; +} + #ifdef ENABLE_CHECKING static void validate_value_data (struct value_data *vd) diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c index f11e920..ce49f16 100644 --- a/gcc/shrink-wrap.c +++ b/gcc/shrink-wrap.c @@ -320,6 +320,15 @@ prepare_shrink_wrap (basic_block entry_block) df_ref *ref; bool split_p = false; + if (JUMP_P (BB_END (entry_block))) + { + /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries + to sink the copies from parameter to callee saved register out of + entry block. copyprop_hardreg_forward_bb_without_debug_insn is called + to release some dependences. */ + copyprop_hardreg_forward_bb_without_debug_insn (entry_block); + } + CLEAR_HARD_REG_SET (uses); CLEAR_HARD_REG_SET (defs); FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h index 22b1d5c..9058d34 100644 --- a/gcc/shrink-wrap.h +++ b/gcc/shrink-wrap.h @@ -45,6 +45,8 @@ extern edge get_unconverted_simple_return (edge, bitmap_head, extern void convert_to_simple_return (edge entry_edge, edge orig_entry_edge, bitmap_head bb_flags, rtx returnjump, vec<edge> unconverted_simple_returns); +/* In regcprop.c */ +extern void copyprop_hardreg_forward_bb_without_debug_insn (basic_block bb); #endif #endif /* GCC_SHRINK_WRAP_H */ diff --git a/gcc/testsuite/gcc.dg/shrink-wrap-loop.c b/gcc/testsuite/gcc.dg/shrink-wrap-loop.c new file mode 100644 index 0000000..17dca4e --- /dev/null +++ b/gcc/testsuite/gcc.dg/shrink-wrap-loop.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target { { x86_64-*-* } || { arm_thumb2 } } } } */ +/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */ + +int foo (int *p1, int *p2); + +int +test (int *p1, int *p2) +{ + int *p; + + for (p = p2; p != 0; p++) + { + if (!foo (p, p1)) + return 0; + } + + return 1; +} +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ +/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */ On 9 May 2014 14:00, Jeff Law <l...@redhat.com> wrote: > On 05/08/14 02:06, Zhenqiang Chen wrote: >> >> Hi, >> >> Similar issue was discussed in thread >> http://gcc.gnu.org/ml/gcc-patches/2013-04/msg01145.html. The patches >> are close to Jeff's suggestion: "sink just the moves out of the >> incoming argument registers". >> >> The patch and following one try to shrink wrap a function with a >> single loop, which can not be handled by >> split_live_ranges_for_shrink_wrap and prepare_shrink_wrap, since the >> induction variable has more than one definitions. Take the test case >> in the patch as example, the pseudo code before shrink-wrap is like: >> >> p = p2 >> if (!p) goto return >> L1: ... >> p = ... >> ... >> goto L1 >> ... >> return: >> >> Function prepare_shrink_wrap does PRE like optimization to sink some >> copies from entry block to the live block. The patches enhance >> prepare_shrink_wrap with: >> (1) Replace the reference of p to p2 in the entry block. (This patch) >> (2) Create a new basic block on the live edge to hold the copy "p = >> p2". (Next patch) >> >> After shrink-wrap, the pseudo code would like: >> >> if (!p2) goto return >> p = p2 >> L1: ... >> p = ... >> ... >> goto L1 >> return: > > Right. Seems like a reasonably useful transformation. Not the totally > general solution, but one that clearly has value. > > > >> 2014-05-08 Zhenqiang Chen <zhenqiang.c...@linaro.org> >> >> * function.c (last_or_compare_p, try_copy_prop): new functions. >> (move_insn_for_shrink_wrap): try copy propagation. >> (prepare_shrink_wrap): Separate last_uses from uses. >> >> testsuite/ChangeLog: >> 2014-05-08 Zhenqiang Chen <zhenqiang.c...@linaro.org> >> >> * shrink-wrap-loop.c: New test case. > > So first at a high level, Steven B.'s recommendation to pull the > shrink-wrapping bits out of function.c is a good one. I'd really like to > see that happen as well. > > >> +/* Check whether INSN is the last insn in BB or >> + a COMPARE for the last insn in BB. */ >> + >> +static bool >> +last_or_compare_p (basic_block bb, rtx insn) >> +{ >> + rtx x = single_set (insn); >> + >> + if ((insn == BB_END (bb)) >> + || ((insn == PREV_INSN (BB_END (bb))) >> + && x && REG_P (SET_DEST (x)) >> + && GET_MODE_CLASS (GET_MODE (SET_DEST (x))) == MODE_CC)) >> + return true; >> + >> + return false; > > So you don't actually check if the insn is a compare, just that it's > destination is MODE_CC. That's probably close enough, but please note it in > the comment. Folks are going to read the comment first, not the > implementation. > > Q. Do we have to do anything special for HAVE_cc0 targets here? > > >> +} >> + >> +/* Try to copy propagate INSN with SRC and DEST in BB to the last COMPARE >> + or JUMP insn, which use registers in LAST_USES. */ > > So why restrict this to just cases where we have to propagate into a COMPARE > at the end of a block? So in your example, assume the first block looks > like > > p = p2; > if (!q) return; > [ ... ] > > We ought to be able to turn that into > > if (!q) return > p = p2; > [ ... ] > > > >> + >> +static bool >> +try_copy_prop (basic_block bb, rtx insn, rtx src, rtx dest, >> + HARD_REG_SET *last_uses) > > My first thought here was that we must have some code which does 90% of what > you need. Did you look at any of the existing RTL optimization > infrastructure to see if there was code you could extend to handle this? > > Jeff