On Mon, Nov 18, 2024 at 9:13 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > In an attempt to reduce compile time, rtl-ssa computes the cost > of existing instructions lazily rather than eagerly. However, > this means that it might need to calculate the cost of an existing > instruction while a change group is already in progress for the > instruction. rtl_ssa::insn_info::calculate_cost therefore temporarily > undoes any in-progress changes in order to get back the original pattern > and insn code. > > rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile, > which calculates the cost of a change involving an arbitrary number > of instructions. Summing up the original cost of N instructions > while those N instructions have in-progress changes could lead to > O(N*N) rtl changes, since each lazy calculation might have to > temporarily undo the changes to all N instructions.
Wheee ... > > We can avoid that by converting the current temporarily_undo_changes/ > redo_changes pair into an RAII class and extending it to allow > nested uses. rtl_ssa::changes_are_worthwhile can then undo the > in-progress changes once, before computing the original cost of all > the instructions. > > Tested on aarch64-linux-gnu. Also tested against the testcase in > the PR, where the old compile time was 3.7x greater than the new compile > time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler). > late-combine went from being 73% of compile time to less than 1% > (rounded to 0% by -fmem-report). The main time sinks now seem to be > DOM and FRE. > > OK to install? OK. Thanks, Richard. > Richard > > > gcc/ > PR rtl-optimization/117297 > * recog.h (temporarily_undo_changes, redo_changes): Delete in > favor of... > (undo_recog_changes): ...this new RAII class. > * fwprop.cc (should_replace_address): Update accordingly. > (fwprop_propagation::check_mem): Likewise. > (try_fwprop_subst_note): Likewise. > (try_fwprop_subst_pattern): Likewise. > * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise. > * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily > undo all in-progress changes while computing the cost of the original > sequence. > * recog.cc (temporarily_undone_changes): Replace with... > (undo_recog_changes::s_num_changes): ...this static member variable. > (validate_change_1): Update check accordingly. > (confirm_change_group): Likewise. > (num_validated_changes): Likewise. > (temporarily_undo_changes): Replace with... > (undo_recog_changes::undo_recog_changes): ...this constructor. > (redo_changes): Replace with... > (undo_recog_changes::~undo_recog_changes): ...this destructor. > --- > gcc/fwprop.cc | 30 ++++++++++++++++-------------- > gcc/recog.cc | 39 +++++++++++++-------------------------- > gcc/recog.h | 31 +++++++++++++++++++++++++++++-- > gcc/rtl-ssa/changes.cc | 26 +++++++++++++++++--------- > gcc/rtl-ssa/insns.cc | 3 +-- > 5 files changed, 76 insertions(+), 53 deletions(-) > > diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc > index 8cba6b7ce9f..5ddefdf6d2f 100644 > --- a/gcc/fwprop.cc > +++ b/gcc/fwprop.cc > @@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem, > rtx_insn *insn) > > /* Prefer the new address if it is less expensive. */ > bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); > - temporarily_undo_changes (old_num_changes); > - gain = address_cost (XEXP (mem, 0), GET_MODE (mem), > - MEM_ADDR_SPACE (mem), speed); > - redo_changes (old_num_changes); > + { > + undo_recog_changes undo (old_num_changes); > + gain = address_cost (XEXP (mem, 0), GET_MODE (mem), > + MEM_ADDR_SPACE (mem), speed); > + } > gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), > MEM_ADDR_SPACE (mem), speed); > > @@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem, > rtx_insn *insn) > if (gain == 0) > { > gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed); > - temporarily_undo_changes (old_num_changes); > + undo_recog_changes undo (old_num_changes); > gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed); > - redo_changes (old_num_changes); > } > > return (gain > 0); > @@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx > mem) > return false; > } > > - temporarily_undo_changes (old_num_changes); > - bool can_simplify = can_simplify_addr (XEXP (mem, 0)); > - redo_changes (old_num_changes); > + bool can_simplify = [&]() > + { > + undo_recog_changes undo (old_num_changes); > + return can_simplify_addr (XEXP (mem, 0)); > + } (); > if (!can_simplify) > { > failure_reason = "would replace a frame address"; > @@ -414,9 +416,10 @@ try_fwprop_subst_note (insn_info *use_insn, set_info > *def, > { > fprintf (dump_file, "\nin notes of insn %d, replacing:\n ", > INSN_UID (use_rtl)); > - temporarily_undo_changes (0); > - print_inline_rtx (dump_file, note, 2); > - redo_changes (0); > + { > + undo_recog_changes undo (0); > + print_inline_rtx (dump_file, note, 2); > + } > fprintf (dump_file, "\n with:\n "); > print_inline_rtx (dump_file, note, 2); > fprintf (dump_file, "\n"); > @@ -468,9 +471,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, > insn_change &use_change, > { > fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n", > def_insn->uid (), use_insn->uid ()); > - temporarily_undo_changes (0); > + undo_recog_changes undo (0); > print_rtl_single (dump_file, PATTERN (use_rtl)); > - redo_changes (0); > } > > bool ok = recog (attempt, use_change); > diff --git a/gcc/recog.cc b/gcc/recog.cc > index 4c63f9301c6..d1811f3618d 100644 > --- a/gcc/recog.cc > +++ b/gcc/recog.cc > @@ -194,7 +194,7 @@ static change_t *changes; > static int changes_allocated; > > static int num_changes = 0; > -static int temporarily_undone_changes = 0; > +int undo_recog_changes::s_num_changes = 0; > > /* Validate a proposed change to OBJECT. LOC is the location in the rtl > at which NEW_RTX will be placed. If NEW_LEN is >= 0, XVECLEN (NEW_RTX, 0) > @@ -220,7 +220,7 @@ static bool > validate_change_1 (rtx object, rtx *loc, rtx new_rtx, bool in_group, > bool unshare, int new_len = -1) > { > - gcc_assert (temporarily_undone_changes == 0); > + gcc_assert (!undo_recog_changes::is_active ()); > rtx old = *loc; > > /* Single-element parallels aren't valid and won't match anything. > @@ -536,7 +536,7 @@ confirm_change_group (void) > int i; > rtx last_object = NULL; > > - gcc_assert (temporarily_undone_changes == 0); > + gcc_assert (!undo_recog_changes::is_active ()); > for (i = 0; i < num_changes; i++) > { > rtx object = changes[i].object; > @@ -592,7 +592,7 @@ num_validated_changes (void) > void > cancel_changes (int num) > { > - gcc_assert (temporarily_undone_changes == 0); > + gcc_assert (!undo_recog_changes::is_active ()); > int i; > > /* Back out all the changes. Do this in the opposite order in which > @@ -627,34 +627,21 @@ swap_change (int num) > } > } > > -/* Temporarily undo all the changes numbered NUM and up, with a view > - to reapplying them later. The next call to the changes machinery > - must be: > - > - redo_changes (NUM) > - > - otherwise things will end up in an invalid state. */ > - > -void > -temporarily_undo_changes (int num) > +undo_recog_changes::undo_recog_changes (int num) > + : m_old_num_changes (s_num_changes) > { > - gcc_assert (temporarily_undone_changes == 0 && num <= num_changes); > - for (int i = num_changes - 1; i >= num; i--) > + gcc_assert (num <= num_changes - s_num_changes); > + for (int i = num_changes - s_num_changes - 1; i >= num; i--) > swap_change (i); > - temporarily_undone_changes = num_changes - num; > + s_num_changes = num_changes - num; > } > > -/* Redo the changes that were temporarily undone by: > - > - temporarily_undo_changes (NUM). */ > - > -void > -redo_changes (int num) > +undo_recog_changes::~undo_recog_changes () > { > - gcc_assert (temporarily_undone_changes == num_changes - num); > - for (int i = num; i < num_changes; ++i) > + for (int i = num_changes - s_num_changes; > + i < num_changes - m_old_num_changes; ++i) > swap_change (i); > - temporarily_undone_changes = 0; > + s_num_changes = m_old_num_changes; > } > > /* Reduce conditional compilation elsewhere. */ > diff --git a/gcc/recog.h b/gcc/recog.h > index 1dccce78ba4..17ccdb6296b 100644 > --- a/gcc/recog.h > +++ b/gcc/recog.h > @@ -202,6 +202,35 @@ inline insn_propagation::insn_propagation (rtx_insn > *insn) > { > } > > +/* An RAII class that temporarily undoes part of the current change group. > + The sequence: > + > + { > + ...; > + undo_recog_changes undo (NUM); > + STMTS; > + } > + > + executes STMTS with all the changes numbered NUM and up temporarily > + reverted. STMTS must not try to add to the current change group. > + > + Nested uses are supported, but each nested NUM must be no greater than > + outer NUMs. */ > + > +class undo_recog_changes > +{ > +public: > + undo_recog_changes (int); > + ~undo_recog_changes (); > + > + static bool is_active () { return s_num_changes != 0; } > + > +private: > + int m_old_num_changes; > + > + static int s_num_changes; > +}; > + > extern void init_recog (void); > extern void init_recog_no_volatile (void); > extern bool check_asm_operands (rtx); > @@ -216,8 +245,6 @@ extern void confirm_change_group (void); > extern bool apply_change_group (void); > extern int num_validated_changes (void); > extern void cancel_changes (int); > -extern void temporarily_undo_changes (int); > -extern void redo_changes (int); > extern bool constrain_operands (int, alternative_mask); > extern bool constrain_operands_cached (rtx_insn *, int); > extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t, > diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc > index f2be1195bc0..5f3a0f0f0ec 100644 > --- a/gcc/rtl-ssa/changes.cc > +++ b/gcc/rtl-ssa/changes.cc > @@ -173,21 +173,29 @@ rtl_ssa::changes_are_worthwhile > (array_slice<insn_change *const> changes, > bool strict_p) > { > unsigned int old_cost = 0; > - unsigned int new_cost = 0; > sreal weighted_old_cost = 0; > - sreal weighted_new_cost = 0; > auto entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; > + { > + undo_recog_changes undo (0); > + for (insn_change *change : changes) > + { > + // Count zero for the old cost if the old instruction was a no-op > + // move or had an unknown cost. This should reduce the chances of > + // making an unprofitable change. > + old_cost += change->old_cost (); > + basic_block cfg_bb = change->bb ()->cfg_bb (); > + if (optimize_bb_for_speed_p (cfg_bb)) > + weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count) > + * change->old_cost ()); > + > + } > + } > + unsigned int new_cost = 0; > + sreal weighted_new_cost = 0; > for (insn_change *change : changes) > { > - // Count zero for the old cost if the old instruction was a no-op > - // move or had an unknown cost. This should reduce the chances of > - // making an unprofitable change. > - old_cost += change->old_cost (); > basic_block cfg_bb = change->bb ()->cfg_bb (); > bool for_speed = optimize_bb_for_speed_p (cfg_bb); > - if (for_speed) > - weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count) > - * change->old_cost ()); > if (!change->is_deletion () > && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE) > { > diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc > index bfc6683cc45..4af79bee7b9 100644 > --- a/gcc/rtl-ssa/insns.cc > +++ b/gcc/rtl-ssa/insns.cc > @@ -49,14 +49,13 @@ void > insn_info::calculate_cost () const > { > basic_block cfg_bb = BLOCK_FOR_INSN (m_rtl); > - temporarily_undo_changes (0); > + undo_recog_changes (0); > if (INSN_CODE (m_rtl) == NOOP_MOVE_INSN_CODE) > // insn_cost also uses 0 to mean "don't know". Callers that > // want to distinguish the cases will need to check INSN_CODE. > m_cost_or_uid = 0; > else > m_cost_or_uid = insn_cost (m_rtl, optimize_bb_for_speed_p (cfg_bb)); > - redo_changes (0); > } > > // Add NOTE to the instruction's notes. > -- > 2.25.1 >