On Mon, Nov 18, 2024 at 9:13 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> In an attempt to reduce compile time, rtl-ssa computes the cost
> of existing instructions lazily rather than eagerly.  However,
> this means that it might need to calculate the cost of an existing
> instruction while a change group is already in progress for the
> instruction.  rtl_ssa::insn_info::calculate_cost therefore temporarily
> undoes any in-progress changes in order to get back the original pattern
> and insn code.
>
> rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile,
> which calculates the cost of a change involving an arbitrary number
> of instructions.  Summing up the original cost of N instructions
> while those N instructions have in-progress changes could lead to
> O(N*N) rtl changes, since each lazy calculation might have to
> temporarily undo the changes to all N instructions.

Wheee ...

>
> We can avoid that by converting the current temporarily_undo_changes/
> redo_changes pair into an RAII class and extending it to allow
> nested uses.  rtl_ssa::changes_are_worthwhile can then undo the
> in-progress changes once, before computing the original cost of all
> the instructions.
>
> Tested on aarch64-linux-gnu.  Also tested against the testcase in
> the PR, where the old compile time was 3.7x greater than the new compile
> time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler).
> late-combine went from being 73% of compile time to less than 1%
> (rounded to 0% by -fmem-report).  The main time sinks now seem to be
> DOM and FRE.
>
> OK to install?

OK.

Thanks,
Richard.

> Richard
>
>
> gcc/
>         PR rtl-optimization/117297
>         * recog.h (temporarily_undo_changes, redo_changes): Delete in
>         favor of...
>         (undo_recog_changes): ...this new RAII class.
>         * fwprop.cc (should_replace_address): Update accordingly.
>         (fwprop_propagation::check_mem): Likewise.
>         (try_fwprop_subst_note): Likewise.
>         (try_fwprop_subst_pattern): Likewise.
>         * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise.
>         * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily
>         undo all in-progress changes while computing the cost of the original
>         sequence.
>         * recog.cc (temporarily_undone_changes): Replace with...
>         (undo_recog_changes::s_num_changes): ...this static member variable.
>         (validate_change_1): Update check accordingly.
>         (confirm_change_group): Likewise.
>         (num_validated_changes): Likewise.
>         (temporarily_undo_changes): Replace with...
>         (undo_recog_changes::undo_recog_changes): ...this constructor.
>         (redo_changes): Replace with...
>         (undo_recog_changes::~undo_recog_changes): ...this destructor.
> ---
>  gcc/fwprop.cc          | 30 ++++++++++++++++--------------
>  gcc/recog.cc           | 39 +++++++++++++--------------------------
>  gcc/recog.h            | 31 +++++++++++++++++++++++++++++--
>  gcc/rtl-ssa/changes.cc | 26 +++++++++++++++++---------
>  gcc/rtl-ssa/insns.cc   |  3 +--
>  5 files changed, 76 insertions(+), 53 deletions(-)
>
> diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
> index 8cba6b7ce9f..5ddefdf6d2f 100644
> --- a/gcc/fwprop.cc
> +++ b/gcc/fwprop.cc
> @@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem, 
> rtx_insn *insn)
>
>    /* Prefer the new address if it is less expensive.  */
>    bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
> -  temporarily_undo_changes (old_num_changes);
> -  gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> -                      MEM_ADDR_SPACE (mem), speed);
> -  redo_changes (old_num_changes);
> +  {
> +    undo_recog_changes undo (old_num_changes);
> +    gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> +                        MEM_ADDR_SPACE (mem), speed);
> +  }
>    gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
>                         MEM_ADDR_SPACE (mem), speed);
>
> @@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem, 
> rtx_insn *insn)
>    if (gain == 0)
>      {
>        gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> -      temporarily_undo_changes (old_num_changes);
> +      undo_recog_changes undo (old_num_changes);
>        gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> -      redo_changes (old_num_changes);
>      }
>
>    return (gain > 0);
> @@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx 
> mem)
>        return false;
>      }
>
> -  temporarily_undo_changes (old_num_changes);
> -  bool can_simplify = can_simplify_addr (XEXP (mem, 0));
> -  redo_changes (old_num_changes);
> +  bool can_simplify = [&]()
> +    {
> +      undo_recog_changes undo (old_num_changes);
> +      return can_simplify_addr (XEXP (mem, 0));
> +    } ();
>    if (!can_simplify)
>      {
>        failure_reason = "would replace a frame address";
> @@ -414,9 +416,10 @@ try_fwprop_subst_note (insn_info *use_insn, set_info 
> *def,
>      {
>        fprintf (dump_file, "\nin notes of insn %d, replacing:\n  ",
>                INSN_UID (use_rtl));
> -      temporarily_undo_changes (0);
> -      print_inline_rtx (dump_file, note, 2);
> -      redo_changes (0);
> +      {
> +       undo_recog_changes undo (0);
> +       print_inline_rtx (dump_file, note, 2);
> +      }
>        fprintf (dump_file, "\n with:\n  ");
>        print_inline_rtx (dump_file, note, 2);
>        fprintf (dump_file, "\n");
> @@ -468,9 +471,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
> insn_change &use_change,
>      {
>        fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
>                def_insn->uid (), use_insn->uid ());
> -      temporarily_undo_changes (0);
> +      undo_recog_changes undo (0);
>        print_rtl_single (dump_file, PATTERN (use_rtl));
> -      redo_changes (0);
>      }
>
>    bool ok = recog (attempt, use_change);
> diff --git a/gcc/recog.cc b/gcc/recog.cc
> index 4c63f9301c6..d1811f3618d 100644
> --- a/gcc/recog.cc
> +++ b/gcc/recog.cc
> @@ -194,7 +194,7 @@ static change_t *changes;
>  static int changes_allocated;
>
>  static int num_changes = 0;
> -static int temporarily_undone_changes = 0;
> +int undo_recog_changes::s_num_changes = 0;
>
>  /* Validate a proposed change to OBJECT.  LOC is the location in the rtl
>     at which NEW_RTX will be placed.  If NEW_LEN is >= 0, XVECLEN (NEW_RTX, 0)
> @@ -220,7 +220,7 @@ static bool
>  validate_change_1 (rtx object, rtx *loc, rtx new_rtx, bool in_group,
>                    bool unshare, int new_len = -1)
>  {
> -  gcc_assert (temporarily_undone_changes == 0);
> +  gcc_assert (!undo_recog_changes::is_active ());
>    rtx old = *loc;
>
>    /* Single-element parallels aren't valid and won't match anything.
> @@ -536,7 +536,7 @@ confirm_change_group (void)
>    int i;
>    rtx last_object = NULL;
>
> -  gcc_assert (temporarily_undone_changes == 0);
> +  gcc_assert (!undo_recog_changes::is_active ());
>    for (i = 0; i < num_changes; i++)
>      {
>        rtx object = changes[i].object;
> @@ -592,7 +592,7 @@ num_validated_changes (void)
>  void
>  cancel_changes (int num)
>  {
> -  gcc_assert (temporarily_undone_changes == 0);
> +  gcc_assert (!undo_recog_changes::is_active ());
>    int i;
>
>    /* Back out all the changes.  Do this in the opposite order in which
> @@ -627,34 +627,21 @@ swap_change (int num)
>      }
>  }
>
> -/* Temporarily undo all the changes numbered NUM and up, with a view
> -   to reapplying them later.  The next call to the changes machinery
> -   must be:
> -
> -      redo_changes (NUM)
> -
> -   otherwise things will end up in an invalid state.  */
> -
> -void
> -temporarily_undo_changes (int num)
> +undo_recog_changes::undo_recog_changes (int num)
> +  : m_old_num_changes (s_num_changes)
>  {
> -  gcc_assert (temporarily_undone_changes == 0 && num <= num_changes);
> -  for (int i = num_changes - 1; i >= num; i--)
> +  gcc_assert (num <= num_changes - s_num_changes);
> +  for (int i = num_changes - s_num_changes - 1; i >= num; i--)
>      swap_change (i);
> -  temporarily_undone_changes = num_changes - num;
> +  s_num_changes = num_changes - num;
>  }
>
> -/* Redo the changes that were temporarily undone by:
> -
> -      temporarily_undo_changes (NUM).  */
> -
> -void
> -redo_changes (int num)
> +undo_recog_changes::~undo_recog_changes ()
>  {
> -  gcc_assert (temporarily_undone_changes == num_changes - num);
> -  for (int i = num; i < num_changes; ++i)
> +  for (int i = num_changes - s_num_changes;
> +       i < num_changes - m_old_num_changes; ++i)
>      swap_change (i);
> -  temporarily_undone_changes = 0;
> +  s_num_changes = m_old_num_changes;
>  }
>
>  /* Reduce conditional compilation elsewhere.  */
> diff --git a/gcc/recog.h b/gcc/recog.h
> index 1dccce78ba4..17ccdb6296b 100644
> --- a/gcc/recog.h
> +++ b/gcc/recog.h
> @@ -202,6 +202,35 @@ inline insn_propagation::insn_propagation (rtx_insn 
> *insn)
>  {
>  }
>
> +/* An RAII class that temporarily undoes part of the current change group.
> +   The sequence:
> +
> +     {
> +       ...;
> +       undo_recog_changes undo (NUM);
> +       STMTS;
> +     }
> +
> +   executes STMTS with all the changes numbered NUM and up temporarily
> +   reverted.  STMTS must not try to add to the current change group.
> +
> +   Nested uses are supported, but each nested NUM must be no greater than
> +   outer NUMs.  */
> +
> +class undo_recog_changes
> +{
> +public:
> +  undo_recog_changes (int);
> +  ~undo_recog_changes ();
> +
> +  static bool is_active () { return s_num_changes != 0; }
> +
> +private:
> +  int m_old_num_changes;
> +
> +  static int s_num_changes;
> +};
> +
>  extern void init_recog (void);
>  extern void init_recog_no_volatile (void);
>  extern bool check_asm_operands (rtx);
> @@ -216,8 +245,6 @@ extern void confirm_change_group (void);
>  extern bool apply_change_group (void);
>  extern int num_validated_changes (void);
>  extern void cancel_changes (int);
> -extern void temporarily_undo_changes (int);
> -extern void redo_changes (int);
>  extern bool constrain_operands (int, alternative_mask);
>  extern bool constrain_operands_cached (rtx_insn *, int);
>  extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t,
> diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
> index f2be1195bc0..5f3a0f0f0ec 100644
> --- a/gcc/rtl-ssa/changes.cc
> +++ b/gcc/rtl-ssa/changes.cc
> @@ -173,21 +173,29 @@ rtl_ssa::changes_are_worthwhile 
> (array_slice<insn_change *const> changes,
>                                  bool strict_p)
>  {
>    unsigned int old_cost = 0;
> -  unsigned int new_cost = 0;
>    sreal weighted_old_cost = 0;
> -  sreal weighted_new_cost = 0;
>    auto entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
> +  {
> +    undo_recog_changes undo (0);
> +    for (insn_change *change : changes)
> +      {
> +       // Count zero for the old cost if the old instruction was a no-op
> +       // move or had an unknown cost.  This should reduce the chances of
> +       // making an unprofitable change.
> +       old_cost += change->old_cost ();
> +       basic_block cfg_bb = change->bb ()->cfg_bb ();
> +       if (optimize_bb_for_speed_p (cfg_bb))
> +         weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
> +                               * change->old_cost ());
> +
> +      }
> +  }
> +  unsigned int new_cost = 0;
> +  sreal weighted_new_cost = 0;
>    for (insn_change *change : changes)
>      {
> -      // Count zero for the old cost if the old instruction was a no-op
> -      // move or had an unknown cost.  This should reduce the chances of
> -      // making an unprofitable change.
> -      old_cost += change->old_cost ();
>        basic_block cfg_bb = change->bb ()->cfg_bb ();
>        bool for_speed = optimize_bb_for_speed_p (cfg_bb);
> -      if (for_speed)
> -       weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
> -                             * change->old_cost ());
>        if (!change->is_deletion ()
>           && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE)
>         {
> diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc
> index bfc6683cc45..4af79bee7b9 100644
> --- a/gcc/rtl-ssa/insns.cc
> +++ b/gcc/rtl-ssa/insns.cc
> @@ -49,14 +49,13 @@ void
>  insn_info::calculate_cost () const
>  {
>    basic_block cfg_bb = BLOCK_FOR_INSN (m_rtl);
> -  temporarily_undo_changes (0);
> +  undo_recog_changes (0);
>    if (INSN_CODE (m_rtl) == NOOP_MOVE_INSN_CODE)
>      // insn_cost also uses 0 to mean "don't know".  Callers that
>      // want to distinguish the cases will need to check INSN_CODE.
>      m_cost_or_uid = 0;
>    else
>      m_cost_or_uid = insn_cost (m_rtl, optimize_bb_for_speed_p (cfg_bb));
> -  redo_changes (0);
>  }
>
>  // Add NOTE to the instruction's notes.
> --
> 2.25.1
>

Reply via email to