On Mon, Jul 26, 2021 at 7:28 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Aldy Hernandez <al...@redhat.com> writes: > > On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford > > <richard.sandif...@arm.com> wrote: > >> > >> Aldy Hernandez <al...@redhat.com> writes: > >> > This patch replaces the evrp_range_analyzer in the loop versioning code > >> > with an on-demand ranger. > >> > > >> > Everything was pretty straightforward, except that range_of_expr requires > >> > a gimple statement as context to provide context aware ranges. I didn't > >> > see > >> > a convient place where the statement was saved, so I made a vector > >> > indexed > >> > by SSA names. As an alternative, I tried to use the loop's first > >> > statement, > >> > but that proved to be insufficient. > >> > >> The mapping is one-to-many though: there can be multiple statements > >> for each SSA name. Maybe that doesn't matter in this context and > >> any of the statements can act as a representative. > >> > >> I'm surprised that the loop's first statement didn't work though, > >> since the SSA name is supposedly known to be loop-invariant. What went > >> wrong when you tried that? > > > > I was looking at the first statement of loop_info->block_list and one > > of the dg.exp=loop-versioning* tests failed. Perhaps I should have > > used the loop itself, as in the attached patch. With this patch all > > of the loop-versioning tests pass. > > > >> > >> > I am not familiar with loop versioning, but if the DOM walk was only > >> > necessary for the calls to record_ranges_from_stmt, this too could be > >> > removed as the ranger will work without it. > >> > >> Yeah, that was the only reason. If the information is available at > >> version_for_unity (I guess it is) then we should just avoid recording > >> the versioning there if so. > >> > >> How expensive is the check? If the result is worth caching, perhaps > >> we should have two bitmaps: the existing one, and one that records > >> whether we've checked a particular SSA name. > >> > >> If the check is relatively cheap then that won't be worth it though. > > > > If you're asking about the range_of_expr check, that's all cached, so > > it should be pretty cheap. Besides, we're no longer calculating > > ranges for each statement in the IL, as we were doing in lv_dom_walker > > with evrp's record_ranges_from_stmt. Only statements of interest are > > queried. > > Sounds good. If the results are already cached then another level > of caching (via the second bitmap I mentioned above) would obviously > be a waste of time.
My callgrind harness for performance testing wasn't able to pick up enough samples to measure the time spent in pass_loop_versioning::execute. I've seen this happen before with passes that run too fast. I'm afraid I don't have enough cycles to continue working on this. > > > How about this patch, pending tests? > > OK, thanks, as a strict improvement over the status quo. But it'd be > even better without the dom walk :-) I've removed the DOM walk, and re-tested. OK to push? Aldy
From 9b1cba95377e7b26b4f0495b1b5998d2f7f33a14 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez <al...@redhat.com> Date: Sat, 24 Jul 2021 12:29:28 +0200 Subject: [PATCH] Replace evrp use in loop versioning with ranger. This patch replaces the evrp_range_analyzer in the loop versioning code with a ranger. Tested on x86-64 Linux. gcc/ChangeLog: * gimple-loop-versioning.cc (lv_dom_walker::lv_dom_walker): Remove. (loop_versioning::lv_dom_walker::before_dom_children): Remove. (loop_versioning::lv_dom_walker::after_dom_children): Remove. (loop_versioning::prune_loop_conditions): Replace vr_values use with range_query interface. (loop_versioning::prune_conditions): Replace dom walk with straight iteration. (pass_loop_versioning::execute): Use ranger. --- gcc/gimple-loop-versioning.cc | 78 ++++++++--------------------------- 1 file changed, 18 insertions(+), 60 deletions(-) diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 4b70c5a4aab..52eb6429171 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -30,19 +30,17 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop.h" #include "ssa.h" #include "tree-scalar-evolution.h" -#include "tree-chrec.h" #include "tree-ssa-loop-ivopts.h" #include "fold-const.h" #include "tree-ssa-propagate.h" #include "tree-inline.h" #include "domwalk.h" -#include "alloc-pool.h" -#include "vr-values.h" -#include "gimple-ssa-evrp-analyze.h" #include "tree-vectorizer.h" #include "omp-general.h" #include "predict.h" #include "tree-into-ssa.h" +#include "gimple-range.h" +#include "tree-cfg.h" namespace { @@ -253,24 +251,6 @@ public: unsigned int run (); private: - /* Used to walk the dominator tree to find loop versioning conditions - that are always false. */ - class lv_dom_walker : public dom_walker - { - public: - lv_dom_walker (loop_versioning &); - - edge before_dom_children (basic_block) FINAL OVERRIDE; - void after_dom_children (basic_block) FINAL OVERRIDE; - - private: - /* The parent pass. */ - loop_versioning &m_lv; - - /* Used to build context-dependent range information. */ - evrp_range_analyzer m_range_analyzer; - }; - /* Used to simplify statements based on conditions that are established by the version checks. */ class name_prop : public substitute_and_fold_engine @@ -308,7 +288,7 @@ private: bool analyze_block (basic_block); bool analyze_blocks (); - void prune_loop_conditions (class loop *, vr_values *); + void prune_loop_conditions (class loop *); bool prune_conditions (); void merge_loop_info (class loop *, class loop *); @@ -499,36 +479,6 @@ loop_info::worth_versioning_p () const && (!bitmap_empty_p (&unity_names) || subloops_benefit_p)); } -loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv) - : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false) -{ -} - -/* Process BB before processing the blocks it dominates. */ - -edge -loop_versioning::lv_dom_walker::before_dom_children (basic_block bb) -{ - m_range_analyzer.enter (bb); - - if (bb == bb->loop_father->header) - m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer); - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false); - - return NULL; -} - -/* Process BB after processing the blocks it dominates. */ - -void -loop_versioning::lv_dom_walker::after_dom_children (basic_block bb) -{ - m_range_analyzer.leave (bb); -} - /* Decide whether to replace VAL with a new value in a versioned loop. Return the new value if so, otherwise return null. */ @@ -1483,18 +1433,21 @@ loop_versioning::analyze_blocks () LOOP. */ void -loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs) +loop_versioning::prune_loop_conditions (class loop *loop) { loop_info &li = get_loop_info (loop); int to_remove = -1; bitmap_iterator bi; unsigned int i; + int_range_max r; EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi) { tree name = ssa_name (i); - const value_range_equiv *vr = vrs->get_value_range (name); - if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name)))) + gimple *stmt = first_stmt (loop->header); + + if (get_range_query (cfun)->range_of_expr (r, name, stmt) + && !r.contains_p (build_one_cst (TREE_TYPE (name)))) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, find_loop_location (loop), @@ -1519,9 +1472,11 @@ loop_versioning::prune_conditions () AUTO_DUMP_SCOPE ("prune_loop_conditions", dump_user_location_t::from_function_decl (m_fn->decl)); - calculate_dominance_info (CDI_DOMINATORS); - lv_dom_walker dom_walker (*this); - dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn)); + basic_block bb; + FOR_EACH_BB_FN (bb, m_fn) + if (bb == bb->loop_father->header) + prune_loop_conditions (bb->loop_father); + return m_num_conditions != 0; } @@ -1810,7 +1765,10 @@ pass_loop_versioning::execute (function *fn) if (number_of_loops (fn) <= 1) return 0; - return loop_versioning (fn).run (); + enable_ranger (fn); + unsigned int ret = loop_versioning (fn).run (); + disable_ranger (fn); + return ret; } } // anon namespace -- 2.31.1