In an attempt to reduce compile time, rtl-ssa computes the cost
of existing instructions lazily rather than eagerly.  However,
this means that it might need to calculate the cost of an existing
instruction while a change group is already in progress for the
instruction.  rtl_ssa::insn_info::calculate_cost therefore temporarily
undoes any in-progress changes in order to get back the original pattern
and insn code.

rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile,
which calculates the cost of a change involving an arbitrary number
of instructions.  Summing up the original cost of N instructions
while those N instructions have in-progress changes could lead to
O(N*N) rtl changes, since each lazy calculation might have to
temporarily undo the changes to all N instructions.

We can avoid that by converting the current temporarily_undo_changes/
redo_changes pair into an RAII class and extending it to allow
nested uses.  rtl_ssa::changes_are_worthwhile can then undo the
in-progress changes once, before computing the original cost of all
the instructions.

Tested on aarch64-linux-gnu.  Also tested against the testcase in
the PR, where the old compile time was 3.7x greater than the new compile
time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler).
late-combine went from being 73% of compile time to less than 1%
(rounded to 0% by -fmem-report).  The main time sinks now seem to be
DOM and FRE.

OK to install?

Richard


gcc/
        PR rtl-optimization/117297
        * recog.h (temporarily_undo_changes, redo_changes): Delete in
        favor of...
        (undo_recog_changes): ...this new RAII class.
        * fwprop.cc (should_replace_address): Update accordingly.
        (fwprop_propagation::check_mem): Likewise.
        (try_fwprop_subst_note): Likewise.
        (try_fwprop_subst_pattern): Likewise.
        * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise.
        * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily
        undo all in-progress changes while computing the cost of the original
        sequence.
        * recog.cc (temporarily_undone_changes): Replace with...
        (undo_recog_changes::s_num_changes): ...this static member variable.
        (validate_change_1): Update check accordingly.
        (confirm_change_group): Likewise.
        (num_validated_changes): Likewise.
        (temporarily_undo_changes): Replace with...
        (undo_recog_changes::undo_recog_changes): ...this constructor.
        (redo_changes): Replace with...
        (undo_recog_changes::~undo_recog_changes): ...this destructor.
---
 gcc/fwprop.cc          | 30 ++++++++++++++++--------------
 gcc/recog.cc           | 39 +++++++++++++--------------------------
 gcc/recog.h            | 31 +++++++++++++++++++++++++++++--
 gcc/rtl-ssa/changes.cc | 26 +++++++++++++++++---------
 gcc/rtl-ssa/insns.cc   |  3 +--
 5 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index 8cba6b7ce9f..5ddefdf6d2f 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem, 
rtx_insn *insn)
 
   /* Prefer the new address if it is less expensive.  */
   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
-  temporarily_undo_changes (old_num_changes);
-  gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
-                      MEM_ADDR_SPACE (mem), speed);
-  redo_changes (old_num_changes);
+  {
+    undo_recog_changes undo (old_num_changes);
+    gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
+                        MEM_ADDR_SPACE (mem), speed);
+  }
   gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
                        MEM_ADDR_SPACE (mem), speed);
 
@@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem, 
rtx_insn *insn)
   if (gain == 0)
     {
       gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
-      temporarily_undo_changes (old_num_changes);
+      undo_recog_changes undo (old_num_changes);
       gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
-      redo_changes (old_num_changes);
     }
 
   return (gain > 0);
@@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx 
mem)
       return false;
     }
 
-  temporarily_undo_changes (old_num_changes);
-  bool can_simplify = can_simplify_addr (XEXP (mem, 0));
-  redo_changes (old_num_changes);
+  bool can_simplify = [&]()
+    {
+      undo_recog_changes undo (old_num_changes);
+      return can_simplify_addr (XEXP (mem, 0));
+    } ();
   if (!can_simplify)
     {
       failure_reason = "would replace a frame address";
@@ -414,9 +416,10 @@ try_fwprop_subst_note (insn_info *use_insn, set_info *def,
     {
       fprintf (dump_file, "\nin notes of insn %d, replacing:\n  ",
               INSN_UID (use_rtl));
-      temporarily_undo_changes (0);
-      print_inline_rtx (dump_file, note, 2);
-      redo_changes (0);
+      {
+       undo_recog_changes undo (0);
+       print_inline_rtx (dump_file, note, 2);
+      }
       fprintf (dump_file, "\n with:\n  ");
       print_inline_rtx (dump_file, note, 2);
       fprintf (dump_file, "\n");
@@ -468,9 +471,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
insn_change &use_change,
     {
       fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
               def_insn->uid (), use_insn->uid ());
-      temporarily_undo_changes (0);
+      undo_recog_changes undo (0);
       print_rtl_single (dump_file, PATTERN (use_rtl));
-      redo_changes (0);
     }
 
   bool ok = recog (attempt, use_change);
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 4c63f9301c6..d1811f3618d 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -194,7 +194,7 @@ static change_t *changes;
 static int changes_allocated;
 
 static int num_changes = 0;
-static int temporarily_undone_changes = 0;
+int undo_recog_changes::s_num_changes = 0;
 
 /* Validate a proposed change to OBJECT.  LOC is the location in the rtl
    at which NEW_RTX will be placed.  If NEW_LEN is >= 0, XVECLEN (NEW_RTX, 0)
@@ -220,7 +220,7 @@ static bool
 validate_change_1 (rtx object, rtx *loc, rtx new_rtx, bool in_group,
                   bool unshare, int new_len = -1)
 {
-  gcc_assert (temporarily_undone_changes == 0);
+  gcc_assert (!undo_recog_changes::is_active ());
   rtx old = *loc;
 
   /* Single-element parallels aren't valid and won't match anything.
@@ -536,7 +536,7 @@ confirm_change_group (void)
   int i;
   rtx last_object = NULL;
 
-  gcc_assert (temporarily_undone_changes == 0);
+  gcc_assert (!undo_recog_changes::is_active ());
   for (i = 0; i < num_changes; i++)
     {
       rtx object = changes[i].object;
@@ -592,7 +592,7 @@ num_validated_changes (void)
 void
 cancel_changes (int num)
 {
-  gcc_assert (temporarily_undone_changes == 0);
+  gcc_assert (!undo_recog_changes::is_active ());
   int i;
 
   /* Back out all the changes.  Do this in the opposite order in which
@@ -627,34 +627,21 @@ swap_change (int num)
     }
 }
 
-/* Temporarily undo all the changes numbered NUM and up, with a view
-   to reapplying them later.  The next call to the changes machinery
-   must be:
-
-      redo_changes (NUM)
-
-   otherwise things will end up in an invalid state.  */
-
-void
-temporarily_undo_changes (int num)
+undo_recog_changes::undo_recog_changes (int num)
+  : m_old_num_changes (s_num_changes)
 {
-  gcc_assert (temporarily_undone_changes == 0 && num <= num_changes);
-  for (int i = num_changes - 1; i >= num; i--)
+  gcc_assert (num <= num_changes - s_num_changes);
+  for (int i = num_changes - s_num_changes - 1; i >= num; i--)
     swap_change (i);
-  temporarily_undone_changes = num_changes - num;
+  s_num_changes = num_changes - num;
 }
 
-/* Redo the changes that were temporarily undone by:
-
-      temporarily_undo_changes (NUM).  */
-
-void
-redo_changes (int num)
+undo_recog_changes::~undo_recog_changes ()
 {
-  gcc_assert (temporarily_undone_changes == num_changes - num);
-  for (int i = num; i < num_changes; ++i)
+  for (int i = num_changes - s_num_changes;
+       i < num_changes - m_old_num_changes; ++i)
     swap_change (i);
-  temporarily_undone_changes = 0;
+  s_num_changes = m_old_num_changes;
 }
 
 /* Reduce conditional compilation elsewhere.  */
diff --git a/gcc/recog.h b/gcc/recog.h
index 1dccce78ba4..17ccdb6296b 100644
--- a/gcc/recog.h
+++ b/gcc/recog.h
@@ -202,6 +202,35 @@ inline insn_propagation::insn_propagation (rtx_insn *insn)
 {
 }
 
+/* An RAII class that temporarily undoes part of the current change group.
+   The sequence:
+
+     {
+       ...;
+       undo_recog_changes undo (NUM);
+       STMTS;
+     }
+
+   executes STMTS with all the changes numbered NUM and up temporarily
+   reverted.  STMTS must not try to add to the current change group.
+
+   Nested uses are supported, but each nested NUM must be no greater than
+   outer NUMs.  */
+
+class undo_recog_changes
+{
+public:
+  undo_recog_changes (int);
+  ~undo_recog_changes ();
+
+  static bool is_active () { return s_num_changes != 0; }
+
+private:
+  int m_old_num_changes;
+
+  static int s_num_changes;
+};
+
 extern void init_recog (void);
 extern void init_recog_no_volatile (void);
 extern bool check_asm_operands (rtx);
@@ -216,8 +245,6 @@ extern void confirm_change_group (void);
 extern bool apply_change_group (void);
 extern int num_validated_changes (void);
 extern void cancel_changes (int);
-extern void temporarily_undo_changes (int);
-extern void redo_changes (int);
 extern bool constrain_operands (int, alternative_mask);
 extern bool constrain_operands_cached (rtx_insn *, int);
 extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t,
diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index f2be1195bc0..5f3a0f0f0ec 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -173,21 +173,29 @@ rtl_ssa::changes_are_worthwhile (array_slice<insn_change 
*const> changes,
                                 bool strict_p)
 {
   unsigned int old_cost = 0;
-  unsigned int new_cost = 0;
   sreal weighted_old_cost = 0;
-  sreal weighted_new_cost = 0;
   auto entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+  {
+    undo_recog_changes undo (0);
+    for (insn_change *change : changes)
+      {
+       // Count zero for the old cost if the old instruction was a no-op
+       // move or had an unknown cost.  This should reduce the chances of
+       // making an unprofitable change.
+       old_cost += change->old_cost ();
+       basic_block cfg_bb = change->bb ()->cfg_bb ();
+       if (optimize_bb_for_speed_p (cfg_bb))
+         weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
+                               * change->old_cost ());
+
+      }
+  }
+  unsigned int new_cost = 0;
+  sreal weighted_new_cost = 0;
   for (insn_change *change : changes)
     {
-      // Count zero for the old cost if the old instruction was a no-op
-      // move or had an unknown cost.  This should reduce the chances of
-      // making an unprofitable change.
-      old_cost += change->old_cost ();
       basic_block cfg_bb = change->bb ()->cfg_bb ();
       bool for_speed = optimize_bb_for_speed_p (cfg_bb);
-      if (for_speed)
-       weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
-                             * change->old_cost ());
       if (!change->is_deletion ()
          && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE)
        {
diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc
index bfc6683cc45..4af79bee7b9 100644
--- a/gcc/rtl-ssa/insns.cc
+++ b/gcc/rtl-ssa/insns.cc
@@ -49,14 +49,13 @@ void
 insn_info::calculate_cost () const
 {
   basic_block cfg_bb = BLOCK_FOR_INSN (m_rtl);
-  temporarily_undo_changes (0);
+  undo_recog_changes (0);
   if (INSN_CODE (m_rtl) == NOOP_MOVE_INSN_CODE)
     // insn_cost also uses 0 to mean "don't know".  Callers that
     // want to distinguish the cases will need to check INSN_CODE.
     m_cost_or_uid = 0;
   else
     m_cost_or_uid = insn_cost (m_rtl, optimize_bb_for_speed_p (cfg_bb));
-  redo_changes (0);
 }
 
 // Add NOTE to the instruction's notes.
-- 
2.25.1

Reply via email to