Doing the indirect jump optimization turned out to be quite easy. Bootstrapped and regression tested powerpc64le-linux, gcc-6, gcc-5 and gcc-4.9. Bootstrap and regression test x86_64-linux still running. OK to apply?
gcc/ PR rtl-optimization/69195 PR rtl-optimization/47992 * ira.c (recorded_label_ref): Delete. (update_equiv_regs): Return void. (indirect_jump_optimize): New function. (ira): Call indirect_jump_optimize and delete_trivially_dead_insns before regstat_compute_ri. Don't rebuild_jump_labels here. Delete update_regstat. gcc/testsuite/ * gcc.dg/pr69195.c: New. * gcc.dg/pr69238.c: New. diff --git a/gcc/ira.c b/gcc/ira.c index 0973258..5e7a2ed 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -3319,9 +3319,6 @@ adjust_cleared_regs (rtx loc, const_rtx old_rtx ATTRIBUTE_UNUSED, void *data) return NULL_RTX; } -/* Nonzero if we recorded an equivalence for a LABEL_REF. */ -static int recorded_label_ref; - /* Find registers that are equivalent to a single value throughout the compilation (either because they can be referenced in memory or are set once from a single constant). Lower their priority for a @@ -3331,10 +3328,8 @@ static int recorded_label_ref; value into the using insn. If it succeeds, we can eliminate the register completely. - Initialize init_insns in ira_reg_equiv array. - - Return non-zero if jump label rebuilding should be done. */ -static int + Initialize init_insns in ira_reg_equiv array. */ +static void update_equiv_regs (void) { rtx_insn *insn; @@ -3343,10 +3338,6 @@ update_equiv_regs (void) bitmap cleared_regs; bool *pdx_subregs; - /* We need to keep track of whether or not we recorded a LABEL_REF so - that we know if the jump optimizer needs to be rerun. */ - recorded_label_ref = 0; - /* Use pdx_subregs to show whether a reg is used in a paradoxical subreg. */ pdx_subregs = XCNEWVEC (bool, max_regno); @@ -3578,17 +3569,6 @@ update_equiv_regs (void) = gen_rtx_INSN_LIST (VOIDmode, insn, ira_reg_equiv[regno].init_insns); - /* Record whether or not we created a REG_EQUIV note for a LABEL_REF. - We might end up substituting the LABEL_REF for uses of the - pseudo here or later. That kind of transformation may turn an - indirect jump into a direct jump, in which case we must rerun the - jump optimizer to ensure that the JUMP_LABEL fields are valid. */ - if (GET_CODE (x) == LABEL_REF - || (GET_CODE (x) == CONST - && GET_CODE (XEXP (x, 0)) == PLUS - && (GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF))) - recorded_label_ref = 1; - reg_equiv[regno].replacement = x; reg_equiv[regno].src_p = &SET_SRC (set); reg_equiv[regno].loop_depth = (short) loop_depth; @@ -3706,9 +3686,9 @@ update_equiv_regs (void) if (! INSN_P (insn)) continue; - /* Don't substitute into a non-local goto, this confuses CFG. */ - if (JUMP_P (insn) - && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) + /* Don't substitute into jumps. indirect_jump_optimize does + this for anything we are prepared to handle. */ + if (JUMP_P (insn)) continue; for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) @@ -3860,11 +3840,50 @@ update_equiv_regs (void) end_alias_analysis (); free (reg_equiv); free (pdx_subregs); - return recorded_label_ref; } - +/* A pass over indirect jumps, converting simple cases to direct jumps. */ +static void +indirect_jump_optimize (void) +{ + basic_block bb; + bool rebuild_p = false; + FOR_EACH_BB_REVERSE_FN (bb, cfun) + { + rtx_insn *insn = BB_END (bb); + if (!JUMP_P (insn)) + continue; + + rtx x = pc_set (insn); + if (!x || !REG_P (SET_SRC (x))) + continue; + + int regno = REGNO (SET_SRC (x)); + if (DF_REG_DEF_COUNT (regno) == 1) + { + rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (regno)); + rtx note = find_reg_note (def_insn, REG_LABEL_OPERAND, NULL_RTX); + + if (note) + { + rtx lab = gen_rtx_LABEL_REF (Pmode, XEXP (note, 0)); + if (validate_replace_rtx (SET_SRC (x), lab, insn)) + rebuild_p = true; + } + } + } + + if (rebuild_p) + { + timevar_push (TV_JUMP); + rebuild_jump_labels (get_insns ()); + if (purge_all_dead_edges ()) + delete_unreachable_blocks (); + timevar_pop (TV_JUMP); + } +} + /* Set up fields memory, constant, and invariant from init_insns in the structures of array ira_reg_equiv. */ static void @@ -5090,7 +5109,6 @@ ira (FILE *f) { bool loops_p; int ira_max_point_before_emit; - int rebuild_p; bool saved_flag_caller_saves = flag_caller_saves; enum ira_region saved_flag_ira_region = flag_ira_region; @@ -5167,6 +5185,10 @@ ira (FILE *f) df_clear_flags (DF_NO_INSN_RESCAN); + indirect_jump_optimize (); + if (delete_trivially_dead_insns (get_insns (), max_reg_num ())) + df_analyze (); + regstat_init_n_sets_and_refs (); regstat_compute_ri (); @@ -5184,32 +5206,12 @@ ira (FILE *f) if (resize_reg_info () && flag_ira_loop_pressure) ira_set_pseudo_classes (true, ira_dump_file); - rebuild_p = update_equiv_regs (); + update_equiv_regs (); setup_reg_equiv (); setup_reg_equiv_init (); - bool update_regstat = false; - - if (optimize && rebuild_p) - { - timevar_push (TV_JUMP); - rebuild_jump_labels (get_insns ()); - if (purge_all_dead_edges ()) - { - delete_unreachable_blocks (); - update_regstat = true; - } - timevar_pop (TV_JUMP); - } - allocated_reg_info_size = max_reg_num (); - if (delete_trivially_dead_insns (get_insns (), max_reg_num ())) - { - df_analyze (); - update_regstat = true; - } - /* It is not worth to do such improvement when we use a simple allocation because of -O0 usage or because the function is too big. */ @@ -5319,7 +5321,7 @@ ira (FILE *f) check_allocation (); #endif - if (update_regstat || max_regno != max_regno_before_ira) + if (max_regno != max_regno_before_ira) { regstat_free_n_sets_and_refs (); regstat_free_ri (); diff --git a/gcc/testsuite/gcc.dg/pr69195.c b/gcc/testsuite/gcc.dg/pr69195.c new file mode 100644 index 0000000..af373a1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr69195.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-dce -fno-forward-propagate" } */ + +void __attribute__ ((noinline, noclone)) +foo (int *a, int n) +{ + int *lasta = a + n; + for (; a != lasta; a++) + { + *a *= 2; + a[1] = a[-1] + a[-2]; + } +} + +int +main () +{ + int a[16] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + int r[16] = { 1, 2, 6, 6, 16, 24, 44, 80, + 136, 248, 432, 768, 1360, 2400, 4256, 3760 }; + unsigned i; + foo (&a[2], 13); + for (i = 0; i < 8; ++i) + if (a[i] != r[i]) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr69238.c b/gcc/testsuite/gcc.dg/pr69238.c new file mode 100644 index 0000000..3538e63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr69238.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-dce -fno-forward-propagate -fno-rerun-cse-after-loop -funroll-loops" } */ + + +#define N 32 + +short sa[N]; +short sb[N]; +int ia[N]; +int ib[N]; + +int __attribute__ ((noinline, noclone)) +main1 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + sa[i+7] = sb[i]; + ia[i+3] = ib[i+1]; + } + return 0; +} + +int +main (void) +{ + return main1 (N-7); +} -- Alan Modra Australia Development Lab, IBM