In an attempt to reduce compile time, rtl-ssa computes the cost of existing instructions lazily rather than eagerly. However, this means that it might need to calculate the cost of an existing instruction while a change group is already in progress for the instruction. rtl_ssa::insn_info::calculate_cost therefore temporarily undoes any in-progress changes in order to get back the original pattern and insn code.
rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile, which calculates the cost of a change involving an arbitrary number of instructions. Summing up the original cost of N instructions while those N instructions have in-progress changes could lead to O(N*N) rtl changes, since each lazy calculation might have to temporarily undo the changes to all N instructions. We can avoid that by converting the current temporarily_undo_changes/ redo_changes pair into an RAII class and extending it to allow nested uses. rtl_ssa::changes_are_worthwhile can then undo the in-progress changes once, before computing the original cost of all the instructions. Tested on aarch64-linux-gnu. Also tested against the testcase in the PR, where the old compile time was 3.7x greater than the new compile time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler). late-combine went from being 73% of compile time to less than 1% (rounded to 0% by -fmem-report). The main time sinks now seem to be DOM and FRE. OK to install? Richard gcc/ PR rtl-optimization/117297 * recog.h (temporarily_undo_changes, redo_changes): Delete in favor of... (undo_recog_changes): ...this new RAII class. * fwprop.cc (should_replace_address): Update accordingly. (fwprop_propagation::check_mem): Likewise. (try_fwprop_subst_note): Likewise. (try_fwprop_subst_pattern): Likewise. * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise. * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily undo all in-progress changes while computing the cost of the original sequence. * recog.cc (temporarily_undone_changes): Replace with... (undo_recog_changes::s_num_changes): ...this static member variable. (validate_change_1): Update check accordingly. (confirm_change_group): Likewise. (num_validated_changes): Likewise. (temporarily_undo_changes): Replace with... (undo_recog_changes::undo_recog_changes): ...this constructor. (redo_changes): Replace with... (undo_recog_changes::~undo_recog_changes): ...this destructor. --- gcc/fwprop.cc | 30 ++++++++++++++++-------------- gcc/recog.cc | 39 +++++++++++++-------------------------- gcc/recog.h | 31 +++++++++++++++++++++++++++++-- gcc/rtl-ssa/changes.cc | 26 +++++++++++++++++--------- gcc/rtl-ssa/insns.cc | 3 +-- 5 files changed, 76 insertions(+), 53 deletions(-) diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc index 8cba6b7ce9f..5ddefdf6d2f 100644 --- a/gcc/fwprop.cc +++ b/gcc/fwprop.cc @@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) /* Prefer the new address if it is less expensive. */ bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); - temporarily_undo_changes (old_num_changes); - gain = address_cost (XEXP (mem, 0), GET_MODE (mem), - MEM_ADDR_SPACE (mem), speed); - redo_changes (old_num_changes); + { + undo_recog_changes undo (old_num_changes); + gain = address_cost (XEXP (mem, 0), GET_MODE (mem), + MEM_ADDR_SPACE (mem), speed); + } gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), MEM_ADDR_SPACE (mem), speed); @@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) if (gain == 0) { gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed); - temporarily_undo_changes (old_num_changes); + undo_recog_changes undo (old_num_changes); gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed); - redo_changes (old_num_changes); } return (gain > 0); @@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx mem) return false; } - temporarily_undo_changes (old_num_changes); - bool can_simplify = can_simplify_addr (XEXP (mem, 0)); - redo_changes (old_num_changes); + bool can_simplify = [&]() + { + undo_recog_changes undo (old_num_changes); + return can_simplify_addr (XEXP (mem, 0)); + } (); if (!can_simplify) { failure_reason = "would replace a frame address"; @@ -414,9 +416,10 @@ try_fwprop_subst_note (insn_info *use_insn, set_info *def, { fprintf (dump_file, "\nin notes of insn %d, replacing:\n ", INSN_UID (use_rtl)); - temporarily_undo_changes (0); - print_inline_rtx (dump_file, note, 2); - redo_changes (0); + { + undo_recog_changes undo (0); + print_inline_rtx (dump_file, note, 2); + } fprintf (dump_file, "\n with:\n "); print_inline_rtx (dump_file, note, 2); fprintf (dump_file, "\n"); @@ -468,9 +471,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, { fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n", def_insn->uid (), use_insn->uid ()); - temporarily_undo_changes (0); + undo_recog_changes undo (0); print_rtl_single (dump_file, PATTERN (use_rtl)); - redo_changes (0); } bool ok = recog (attempt, use_change); diff --git a/gcc/recog.cc b/gcc/recog.cc index 4c63f9301c6..d1811f3618d 100644 --- a/gcc/recog.cc +++ b/gcc/recog.cc @@ -194,7 +194,7 @@ static change_t *changes; static int changes_allocated; static int num_changes = 0; -static int temporarily_undone_changes = 0; +int undo_recog_changes::s_num_changes = 0; /* Validate a proposed change to OBJECT. LOC is the location in the rtl at which NEW_RTX will be placed. If NEW_LEN is >= 0, XVECLEN (NEW_RTX, 0) @@ -220,7 +220,7 @@ static bool validate_change_1 (rtx object, rtx *loc, rtx new_rtx, bool in_group, bool unshare, int new_len = -1) { - gcc_assert (temporarily_undone_changes == 0); + gcc_assert (!undo_recog_changes::is_active ()); rtx old = *loc; /* Single-element parallels aren't valid and won't match anything. @@ -536,7 +536,7 @@ confirm_change_group (void) int i; rtx last_object = NULL; - gcc_assert (temporarily_undone_changes == 0); + gcc_assert (!undo_recog_changes::is_active ()); for (i = 0; i < num_changes; i++) { rtx object = changes[i].object; @@ -592,7 +592,7 @@ num_validated_changes (void) void cancel_changes (int num) { - gcc_assert (temporarily_undone_changes == 0); + gcc_assert (!undo_recog_changes::is_active ()); int i; /* Back out all the changes. Do this in the opposite order in which @@ -627,34 +627,21 @@ swap_change (int num) } } -/* Temporarily undo all the changes numbered NUM and up, with a view - to reapplying them later. The next call to the changes machinery - must be: - - redo_changes (NUM) - - otherwise things will end up in an invalid state. */ - -void -temporarily_undo_changes (int num) +undo_recog_changes::undo_recog_changes (int num) + : m_old_num_changes (s_num_changes) { - gcc_assert (temporarily_undone_changes == 0 && num <= num_changes); - for (int i = num_changes - 1; i >= num; i--) + gcc_assert (num <= num_changes - s_num_changes); + for (int i = num_changes - s_num_changes - 1; i >= num; i--) swap_change (i); - temporarily_undone_changes = num_changes - num; + s_num_changes = num_changes - num; } -/* Redo the changes that were temporarily undone by: - - temporarily_undo_changes (NUM). */ - -void -redo_changes (int num) +undo_recog_changes::~undo_recog_changes () { - gcc_assert (temporarily_undone_changes == num_changes - num); - for (int i = num; i < num_changes; ++i) + for (int i = num_changes - s_num_changes; + i < num_changes - m_old_num_changes; ++i) swap_change (i); - temporarily_undone_changes = 0; + s_num_changes = m_old_num_changes; } /* Reduce conditional compilation elsewhere. */ diff --git a/gcc/recog.h b/gcc/recog.h index 1dccce78ba4..17ccdb6296b 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -202,6 +202,35 @@ inline insn_propagation::insn_propagation (rtx_insn *insn) { } +/* An RAII class that temporarily undoes part of the current change group. + The sequence: + + { + ...; + undo_recog_changes undo (NUM); + STMTS; + } + + executes STMTS with all the changes numbered NUM and up temporarily + reverted. STMTS must not try to add to the current change group. + + Nested uses are supported, but each nested NUM must be no greater than + outer NUMs. */ + +class undo_recog_changes +{ +public: + undo_recog_changes (int); + ~undo_recog_changes (); + + static bool is_active () { return s_num_changes != 0; } + +private: + int m_old_num_changes; + + static int s_num_changes; +}; + extern void init_recog (void); extern void init_recog_no_volatile (void); extern bool check_asm_operands (rtx); @@ -216,8 +245,6 @@ extern void confirm_change_group (void); extern bool apply_change_group (void); extern int num_validated_changes (void); extern void cancel_changes (int); -extern void temporarily_undo_changes (int); -extern void redo_changes (int); extern bool constrain_operands (int, alternative_mask); extern bool constrain_operands_cached (rtx_insn *, int); extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t, diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc index f2be1195bc0..5f3a0f0f0ec 100644 --- a/gcc/rtl-ssa/changes.cc +++ b/gcc/rtl-ssa/changes.cc @@ -173,21 +173,29 @@ rtl_ssa::changes_are_worthwhile (array_slice<insn_change *const> changes, bool strict_p) { unsigned int old_cost = 0; - unsigned int new_cost = 0; sreal weighted_old_cost = 0; - sreal weighted_new_cost = 0; auto entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + { + undo_recog_changes undo (0); + for (insn_change *change : changes) + { + // Count zero for the old cost if the old instruction was a no-op + // move or had an unknown cost. This should reduce the chances of + // making an unprofitable change. + old_cost += change->old_cost (); + basic_block cfg_bb = change->bb ()->cfg_bb (); + if (optimize_bb_for_speed_p (cfg_bb)) + weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count) + * change->old_cost ()); + + } + } + unsigned int new_cost = 0; + sreal weighted_new_cost = 0; for (insn_change *change : changes) { - // Count zero for the old cost if the old instruction was a no-op - // move or had an unknown cost. This should reduce the chances of - // making an unprofitable change. - old_cost += change->old_cost (); basic_block cfg_bb = change->bb ()->cfg_bb (); bool for_speed = optimize_bb_for_speed_p (cfg_bb); - if (for_speed) - weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count) - * change->old_cost ()); if (!change->is_deletion () && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE) { diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc index bfc6683cc45..4af79bee7b9 100644 --- a/gcc/rtl-ssa/insns.cc +++ b/gcc/rtl-ssa/insns.cc @@ -49,14 +49,13 @@ void insn_info::calculate_cost () const { basic_block cfg_bb = BLOCK_FOR_INSN (m_rtl); - temporarily_undo_changes (0); + undo_recog_changes (0); if (INSN_CODE (m_rtl) == NOOP_MOVE_INSN_CODE) // insn_cost also uses 0 to mean "don't know". Callers that // want to distinguish the cases will need to check INSN_CODE. m_cost_or_uid = 0; else m_cost_or_uid = insn_cost (m_rtl, optimize_bb_for_speed_p (cfg_bb)); - redo_changes (0); } // Add NOTE to the instruction's notes. -- 2.25.1