Hi, last Thursday I had to revert a previous version of this patch because it has caused a lot of trouble on various platforms I did not test it on. The patch is still very similar to its previous iteration (http://gcc.gnu.org/ml/gcc-patches/2013-10/msg02183.html) the only major difference is that I moved more after-the-fact re-analyzing from find_moveable_pseudos to a point when my transformation also finished.
The problem with it was that REG_N_REFS of the pseudos introduced by my code was not set and thus reload ignored and let it slip through instead of generating spills. This is fixed by moving the re-computation of regstat_n_sets_and_refs in regstat_init_n_sets_and_refs to after my transformation. In order to avoid similar surprises I have moved all other re-computations from find_moveable_pseudos to the caller in ira, namely: fix_reg_equiv_init (); expand_reg_info (); regstat_free_n_sets_and_refs (); regstat_free_ri (); regstat_init_n_sets_and_refs (); regstat_compute_ri (); I have also bootstrapped and tested the patch not only on x86_64-linux but also on i686-linux and ppc64-linux (without Ada though). I have made sure that the reported problem does not occur on cris-elf and sh-none-elf cross compilers. (I could not reproduce it on arm, and do not have access to sparc but it was also reported there.) Another minor change which I erroneously omitted the last time is that the testcases are run on x86_64 only because that is the only platform where I know the transformation currently takes place. The reason why I did not move them to a target-specific directory is that I believe the transformation can be beneficial on other platforms as well. For example, PR 10474 was actually filed against PPC but this patch does not work there because the initial move of the parameter is done in a parallel insn: (parallel [ (set (reg:CC 124) (compare:CC (reg:DI 3 3 [ i ]) (const_int 0 [0]))) (set (reg/v/f:DI 123 [ i ]) (reg:DI 3 3 [ i ])) ]) which fails my single_set test. However, the mechanism can be extended to handle these situations as well and afterwards we could run the test also on ppc64. So, Vlad, Steven, do you think that this time I have re-computed all that is necessary? Do you think the patch is OK? Thanks a lot and sorry for the breakage, Martin 2013-11-04 Martin Jambor <mjam...@suse.cz> PR rtl-optimization/10474 * ira.c (interesting_dest_for_shprep): New function. (split_live_ranges_for_shrink_wrap): Likewise. (find_moveable_pseudos): Move calculation of dominance info, df_analysios and the final anlyses to... (ira): ...here, call split_live_ranges_for_shrink_wrap. testsuite/ * gcc.dg/pr10474.c: New testcase. * gcc.dg/ira-shrinkwrap-prep-1.c: Likewise. * gcc.dg/ira-shrinkwrap-prep-2.c: Likewise. diff --git a/gcc/ira.c b/gcc/ira.c index 9e94704..70fe13b 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -4514,9 +4514,6 @@ find_moveable_pseudos (void) pseudo_replaced_reg.release (); pseudo_replaced_reg.safe_grow_cleared (max_regs); - df_analyze (); - calculate_dominance_info (CDI_DOMINATORS); - i = 0; bitmap_initialize (&live, 0); bitmap_initialize (&used, 0); @@ -4829,14 +4826,196 @@ find_moveable_pseudos (void) free (bb_moveable_reg_sets); last_moveable_pseudo = max_reg_num (); +} - fix_reg_equiv_init (); - expand_reg_info (); - regstat_free_n_sets_and_refs (); - regstat_free_ri (); - regstat_init_n_sets_and_refs (); - regstat_compute_ri (); - free_dominance_info (CDI_DOMINATORS); + +/* If insn is interesting for parameter range-splitting shring-wrapping + preparation, i.e. it is a single set from a hard register to a pseudo, which + is live at CALL_DOM, return the destination. Otherwise return NULL. */ + +static rtx +interesting_dest_for_shprep (rtx insn, basic_block call_dom) +{ + rtx set = single_set (insn); + if (!set) + return NULL; + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + if (!REG_P (src) || !HARD_REGISTER_P (src) + || !REG_P (dest) || HARD_REGISTER_P (dest) + || (call_dom && !bitmap_bit_p (df_get_live_in (call_dom), REGNO (dest)))) + return NULL; + return dest; +} + +/* Split live ranges of pseudos that are loaded from hard registers in the + first BB in a BB that dominates all non-sibling call if such a BB can be + found and is not in a loop. Return true if the function has made any + changes. */ + +static bool +split_live_ranges_for_shrink_wrap (void) +{ + basic_block bb, call_dom = NULL; + basic_block first = single_succ (ENTRY_BLOCK_PTR); + rtx insn, last_interesting_insn = NULL; + bitmap_head need_new, reachable; + vec<basic_block> queue; + + if (!flag_shrink_wrap) + return false; + + bitmap_initialize (&need_new, 0); + bitmap_initialize (&reachable, 0); + queue.create (n_basic_blocks); + + FOR_EACH_BB (bb) + FOR_BB_INSNS (bb, insn) + if (CALL_P (insn) && !SIBLING_CALL_P (insn)) + { + if (bb == first) + { + bitmap_clear (&need_new); + bitmap_clear (&reachable); + queue.release (); + return false; + } + + bitmap_set_bit (&need_new, bb->index); + bitmap_set_bit (&reachable, bb->index); + queue.quick_push (bb); + break; + } + + if (queue.is_empty ()) + { + bitmap_clear (&need_new); + bitmap_clear (&reachable); + queue.release (); + return false; + } + + while (!queue.is_empty ()) + { + edge e; + edge_iterator ei; + + bb = queue.pop (); + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest != EXIT_BLOCK_PTR + && bitmap_set_bit (&reachable, e->dest->index)) + queue.quick_push (e->dest); + } + queue.release (); + + FOR_BB_INSNS (first, insn) + { + rtx dest = interesting_dest_for_shprep (insn, NULL); + if (!dest) + continue; + + if (DF_REG_DEF_COUNT (REGNO (dest)) > 1) + { + bitmap_clear (&need_new); + bitmap_clear (&reachable); + return false; + } + + for (df_ref use = DF_REG_USE_CHAIN (REGNO(dest)); + use; + use = DF_REF_NEXT_REG (use)) + { + if (NONDEBUG_INSN_P (DF_REF_INSN (use)) + && GET_CODE (DF_REF_REG (use)) == SUBREG) + { + /* This is necessary to avoid hitting an assert at + postreload.c:2294 in libstc++ testcases on x86_64-linux. I'm + not really sure what the probblem actually is there. */ + bitmap_clear (&need_new); + bitmap_clear (&reachable); + return false; + } + + int ubbi = DF_REF_BB (use)->index; + if (bitmap_bit_p (&reachable, ubbi)) + bitmap_set_bit (&need_new, ubbi); + } + last_interesting_insn = insn; + } + + bitmap_clear (&reachable); + if (!last_interesting_insn) + { + bitmap_clear (&need_new); + return false; + } + + call_dom = nearest_common_dominator_for_set (CDI_DOMINATORS, &need_new); + bitmap_clear (&need_new); + if (call_dom == first) + return false; + + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + while (bb_loop_depth (call_dom) > 0) + call_dom = get_immediate_dominator (CDI_DOMINATORS, call_dom); + loop_optimizer_finalize (); + + if (call_dom == first) + return false; + + calculate_dominance_info (CDI_POST_DOMINATORS); + if (dominated_by_p (CDI_POST_DOMINATORS, first, call_dom)) + { + free_dominance_info (CDI_POST_DOMINATORS); + return false; + } + free_dominance_info (CDI_POST_DOMINATORS); + + if (dump_file) + fprintf (dump_file, "Will split live ranges of parameters at BB %i\n", + call_dom->index); + + bool ret = false; + FOR_BB_INSNS (first, insn) + { + rtx dest = interesting_dest_for_shprep (insn, call_dom); + if (!dest) + continue; + + rtx newreg = NULL_RTX; + df_ref use, next; + for (use = DF_REG_USE_CHAIN (REGNO(dest)); use; use = next) + { + rtx uin = DF_REF_INSN (use); + next = DF_REF_NEXT_REG (use); + + basic_block ubb = BLOCK_FOR_INSN (uin); + if (ubb == call_dom + || dominated_by_p (CDI_DOMINATORS, ubb, call_dom)) + { + if (!newreg) + newreg = ira_create_new_reg (dest); + validate_change (uin, DF_REF_LOC (use), newreg, true); + } + } + + if (newreg) + { + rtx new_move = gen_move_insn (newreg, dest); + emit_insn_after (new_move, bb_note (call_dom)); + if (dump_file) + { + fprintf (dump_file, "Split live-range of register "); + print_rtl_single (dump_file, dest); + } + ret = true; + } + + if (insn == last_interesting_insn) + break; + } + apply_change_group (); + return ret; } /* Perform the second half of the transformation started in @@ -5048,7 +5227,22 @@ ira (FILE *f) allocation because of -O0 usage or because the function is too big. */ if (ira_conflicts_p) - find_moveable_pseudos (); + { + df_analyze (); + calculate_dominance_info (CDI_DOMINATORS); + + find_moveable_pseudos (); + if (split_live_ranges_for_shrink_wrap ()) + df_analyze (); + + fix_reg_equiv_init (); + expand_reg_info (); + regstat_free_n_sets_and_refs (); + regstat_free_ri (); + regstat_init_n_sets_and_refs (); + regstat_compute_ri (); + free_dominance_info (CDI_DOMINATORS); + } max_regno_before_ira = max_reg_num (); ira_setup_eliminable_regset (true); diff --git a/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c b/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c new file mode 100644 index 0000000..16095e3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target x86_64-*-* } } */ +/* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */ + +int __attribute__((noinline, noclone)) +foo (int a) +{ + return a + 5; +} + +static int g; + +int __attribute__((noinline, noclone)) +bar (int a) +{ + int r; + + if (a) + { + r = foo (a); + g = r + a; + } + else + r = a+1; + return r; +} + +/* { dg-final { scan-rtl-dump "Will split live ranges of parameters" "ira" } } */ +/* { dg-final { scan-rtl-dump "Split live-range of register" "ira" } } */ +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ +/* { dg-final { cleanup-rtl-dump "ira" } } */ +/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */ diff --git a/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c b/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c new file mode 100644 index 0000000..2b00c5b --- /dev/null +++ b/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target x86_64-*-* } } */ +/* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */ + +int __attribute__((noinline, noclone)) +foo (int a) +{ + return a + 5; +} + +static int g; + +int __attribute__((noinline, noclone)) +bar (int a) +{ + int r; + + if (a) + { + r = a; + while (r < 500) + if (r % 2) + r = foo (r); + else + r = foo (r+1); + g = r + a; + } + else + r = g+1; + return r; +} + +/* { dg-final { scan-rtl-dump "Will split live ranges of parameters" "ira" } } */ +/* { dg-final { scan-rtl-dump "Split live-range of register" "ira" } } */ +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ +/* { dg-final { cleanup-rtl-dump "ira" } } */ +/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */ diff --git a/gcc/testsuite/gcc.dg/pr10474.c b/gcc/testsuite/gcc.dg/pr10474.c new file mode 100644 index 0000000..4fcd75d --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr10474.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target x86_64-*-* } } */ +/* { dg-options "-O3 -fdump-rtl-pro_and_epilogue" } */ + +void f(int *i) +{ + if (!i) + return; + else + { + __builtin_printf("Hi"); + *i=0; + } +} + +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ +/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */