This patch changes the algorithm for fast VRP. When looking at PR
114855, although VRP is not the main culrpit, it is taking about 600
seconds out of the 10,000.. far more than it should. That test case has
about 400,000 basic blocks, and when we get into that size, some of the
tables and such are just too massive.
I experimented with using the fast_vrp pass I had created last year, and
although it was better (dropped to 100 seconds), it was clear that the
mechanism I had chosen for fast_vrp was still failing miserably in this
case. So I revamped it. This is the new fast-vrp algorithm. Its
actually simpler anyway.
It still reuses components of ranger, so there shouldn't be any
correctness issues (right? :-).
This is how the new mechanism works.
- Global range are simply stored in a global range cache.
- Upon entry to each basic block, we pick up any contextual ranges that
were active from the immediate dominator. (The first block would have none).
- If the block is a single predecessor block, we also pick up all
contextual ranges generated from that edge. gori_on_edge provides the
complete set.
- This is combined with the global range (or existing contextual range),
to procude the new set of contextual ranges for the block.
- The current basic block has a lazy_ssa_cache object which contains all
contextual ranges that are active, so picking up the range for an
ssa-name is now simple. Its either the contextual range if poresent, or
the global range.
- when post_bb is processed, we free the contextual range cache for the
block.
This runs that testcase in 7 seconds now, so its a big improvement, and
shoulduse a lot less memory as we don't build export and dependency tables.
This version also turns on the relation_oracle, which allows for a lot
of relation processing to also happen.
I have tested it with a patch which uses it for all 3 passes of VRP
always, and this bootstraps successfully. The original fast vrp ran
about 38% faster, on a bootstrap of GCC, this runs about 32% faster,
but also includes relation processing which the previous version did
not. It doesn't get everything normal VRP gets, but that is to be
expected... it still gets a lot. Eventually I'll spend another week and
see if we can add inferred ranges or get any other low hanging fruit
that is missed from the testsuite.
Bootstraps on x86_64-pc-linux-gnu with no regressions. (Of course, it
isn't actually called anywhere yet). Pushed.
Andrew
From 6bfd8f1b0ac116fb2cb1edb4a9dff7069257adb7 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod <amacl...@redhat.com>
Date: Mon, 17 Jun 2024 11:32:51 -0400
Subject: [PATCH 2/5] Change fast VRP algorithm
Change the fast VRP algorithm to track contextual ranges active within
each basic block.
* gimple-range.cc (dom_ranger::dom_ranger): Create a block
vector.
(dom_ranger::~dom_ranger): Dispose of the block vector.
(dom_ranger::edge_range): Delete.
(dom_ranger::range_on_edge): Combine range in src BB with any
range gori_nme_on_edge returns.
(dom_ranger::range_in_bb): Combine global range with any active
contextual range for an ssa-name.
(dom_ranger::range_of_stmt): Fix non-ssa LHS case, use
fur_depend for folding so relations can be registered.
(dom_ranger::maybe_push_edge): Delete.
(dom_ranger::pre_bb): Create incoming contextual range vector.
(dom_ranger::post_bb): Free contextual range vector.
* gimple-range.h (dom_ranger::edge_range): Delete.
(dom_ranger::m_e0): Delete.
(dom_ranger::m_e1): Delete.
(dom_ranger::m_bb): New.
(dom_ranger::m_pop_list): Delete.
* tree-vrp.cc (execute_fast_vrp): Enable relation oracle.
---
gcc/gimple-range.cc | 232 ++++++++++++++++----------------------------
gcc/gimple-range.h | 8 +-
gcc/tree-vrp.cc | 2 +
3 files changed, 90 insertions(+), 152 deletions(-)
diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index f3e4ec2d249..4e507485f5e 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -918,7 +918,15 @@ assume_query::dump (FILE *f)
}
// ---------------------------------------------------------------------------
-
+//
+// The DOM based ranger assumes a single DOM walk through the IL, and is
+// used by the fvrp_folder as a fast VRP.
+// During the dom walk, the current block has an ssa_lazy_cache pointer
+// m_bb[bb->index] which represents all the cumulative contextual ranges
+// active in the block.
+// These ranges are pure static ranges generated by branches, and must be
+// combined with the equivlaent global range to produce the final range.
+// A NULL pointer means there are no contextual ranges.
// Create a DOM based ranger for use by a DOM walk pass.
@@ -926,11 +934,8 @@ dom_ranger::dom_ranger () : m_global ()
{
m_freelist.create (0);
m_freelist.truncate (0);
- m_e0.create (0);
- m_e0.safe_grow_cleared (last_basic_block_for_fn (cfun));
- m_e1.create (0);
- m_e1.safe_grow_cleared (last_basic_block_for_fn (cfun));
- m_pop_list = BITMAP_ALLOC (NULL);
+ m_bb.create (0);
+ m_bb.safe_grow_cleared (last_basic_block_for_fn (cfun));
if (dump_file && (param_ranger_debug & RANGER_DEBUG_TRACE))
tracer.enable_trace ();
}
@@ -945,9 +950,7 @@ dom_ranger::~dom_ranger ()
fprintf (dump_file, "=========================:\n");
m_global.dump (dump_file);
}
- BITMAP_FREE (m_pop_list);
- m_e1.release ();
- m_e0.release ();
+ m_bb.release ();
m_freelist.release ();
}
@@ -973,6 +976,7 @@ dom_ranger::range_of_expr (vrange &r, tree expr, gimple *s)
fprintf (dump_file, "\n");
}
+ // If there is a statement, return the range in that statements block.
if (s)
range_in_bb (r, gimple_bb (s), expr);
else
@@ -983,37 +987,15 @@ dom_ranger::range_of_expr (vrange &r, tree expr, gimple *s)
return true;
}
-
-// Return TRUE and the range if edge E has a range set for NAME in
-// block E->src.
-
-bool
-dom_ranger::edge_range (vrange &r, edge e, tree name)
-{
- bool ret = false;
- basic_block bb = e->src;
-
- // Check if BB has any outgoing ranges on edge E.
- ssa_lazy_cache *out = NULL;
- if (EDGE_SUCC (bb, 0) == e)
- out = m_e0[bb->index];
- else if (EDGE_SUCC (bb, 1) == e)
- out = m_e1[bb->index];
-
- // If there is an edge vector and it has a range, pick it up.
- if (out && out->has_range (name))
- ret = out->get_range (r, name);
-
- return ret;
-}
-
-
// Return the range of EXPR on edge E in R.
// Return false if no range can be calculated.
bool
dom_ranger::range_on_edge (vrange &r, edge e, tree expr)
{
+ if (!gimple_range_ssa_p (expr))
+ return get_tree_range (r, expr, NULL);
+
basic_block bb = e->src;
unsigned idx;
if ((idx = tracer.header ("range_on_edge ")))
@@ -1023,11 +1005,10 @@ dom_ranger::range_on_edge (vrange &r, edge e, tree expr)
fputc ('\n',dump_file);
}
- if (!gimple_range_ssa_p (expr))
- return get_tree_range (r, expr, NULL);
-
- if (!edge_range (r, e, expr))
- range_in_bb (r, bb, expr);
+ range_in_bb (r, bb, expr);
+ value_range vr(TREE_TYPE (expr));
+ if (gori_name_on_edge (vr, expr, e, this))
+ r.intersect (vr);
if (idx)
tracer.trailer (idx, " ", true, expr, r);
@@ -1039,35 +1020,23 @@ dom_ranger::range_on_edge (vrange &r, edge e, tree expr)
void
dom_ranger::range_in_bb (vrange &r, basic_block bb, tree name)
{
- basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
- // Loop through dominators until we get to the entry block, or we find
- // either the defintion block for NAME, or a single pred edge with a range.
- while (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ // Start with the global value.
+ m_global.range_of_expr (r, name);
+
+ // If there is a contextual range, intersect it with the global range
+ ssa_lazy_cache *context = m_bb[bb->index];
+ if (context && context->has_range (name))
{
- // If we hit the deifntion block, pick up the global value.
- if (bb == def_bb)
- {
- m_global.range_of_expr (r, name);
- return;
- }
- // If its a single pred, check the outgoing range of the edge.
- if (EDGE_COUNT (bb->preds) == 1
- && edge_range (r, EDGE_PRED (bb, 0), name))
- return;
- // Otherwise move up to the dominator, and check again.
- bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+ value_range cr (TREE_TYPE (name));
+ context->get_range (cr, name);
+ r.intersect (cr);
}
- m_global.range_of_expr (r, name);
}
-
// Calculate the range of NAME, as the def of stmt S and return it in R.
-// Return FALSE if no range cqn be calculated.
+// Return FALSE if no range can be calculated.
// Also set the global range for NAME as this should only be called within
// the def block during a DOM walk.
-// Outgoing edges were pre-calculated, so when we establish a global defintion
-// check if any outgoing edges hav ranges that can be combined with the
-// global.
bool
dom_ranger::range_of_stmt (vrange &r, gimple *s, tree name)
@@ -1075,9 +1044,10 @@ dom_ranger::range_of_stmt (vrange &r, gimple *s, tree name)
unsigned idx;
bool ret;
if (!name)
- name = gimple_range_ssa_p (gimple_get_lhs (s));
+ name = gimple_get_lhs (s);
- gcc_checking_assert (!name || name == gimple_get_lhs (s));
+ if (name && !gimple_range_ssa_p (name))
+ return get_tree_range (r, name, NULL);
if ((idx = tracer.header ("range_of_stmt ")))
print_gimple_stmt (dump_file, s, 0, TDF_SLIM);
@@ -1091,9 +1061,13 @@ dom_ranger::range_of_stmt (vrange &r, gimple *s, tree name)
return ret;
}
+ // Fold using a fur_depend object so that relations are registered.
+ fold_using_range f;
+ fur_depend src (s, this);
+ ret = f.fold_stmt (r, s, src, name);
+
// If there is a new calculated range and it is not varying, set
// a global range.
- ret = fold_range (r, s, this);
if (ret && name && m_global.merge_range (name, r) && !r.varying_p ())
{
if (set_range_info (name, r) && dump_file)
@@ -1104,115 +1078,81 @@ dom_ranger::range_of_stmt (vrange &r, gimple *s, tree name)
r.dump (dump_file);
fputc ('\n', dump_file);
}
- basic_block bb = gimple_bb (s);
- unsigned bbi = bb->index;
- value_range vr (TREE_TYPE (name));
- // If there is a range on edge 0, update it.
- if (m_e0[bbi] && m_e0[bbi]->has_range (name))
- {
- if (m_e0[bbi]->merge_range (name, r) && dump_file
- && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Outgoing range for ");
- print_generic_expr (dump_file, name, TDF_SLIM);
- fprintf (dump_file, " updated on edge %d->%d : ", bbi,
- EDGE_SUCC (bb, 0)->dest->index);
- if (m_e0[bbi]->get_range (vr, name))
- vr.dump (dump_file);
- fputc ('\n', dump_file);
- }
- }
- // If there is a range on edge 1, update it.
- if (m_e1[bbi] && m_e1[bbi]->has_range (name))
- {
- if (m_e1[bbi]->merge_range (name, r) && dump_file
- && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Outgoing range for ");
- print_generic_expr (dump_file, name, TDF_SLIM);
- fprintf (dump_file, " updated on edge %d->%d : ", bbi,
- EDGE_SUCC (bb, 1)->dest->index);
- if (m_e1[bbi]->get_range (vr, name))
- vr.dump (dump_file);
- fputc ('\n', dump_file);
- }
- }
}
if (idx)
tracer.trailer (idx, " ", ret, name, r);
return ret;
}
-// Check if GORI has an ranges on edge E. If there re, store them in
-// either the E0 or E1 vector based on EDGE_0.
-// If there are no ranges, put the empty lazy_cache entry on the freelist
-// for use next time.
+// Preprocess block BB. If there is a single predecessor, start with any
+// contextual ranges on the incoming edge, otherwise the initial list
+// of ranges i empty for this block. Then Merge in any contextual ranges
+// from the dominator block. Tihs will become the contextual ranges
+// that apply to this block.
void
-dom_ranger::maybe_push_edge (edge e, bool edge_0)
+dom_ranger::pre_bb (basic_block bb)
{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "#FVRP entering BB %d\n", bb->index);
+
+ m_bb[bb->index] = NULL;
+ basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+
ssa_lazy_cache *e_cache;
if (!m_freelist.is_empty ())
e_cache = m_freelist.pop ();
else
e_cache = new ssa_lazy_cache;
- gori_on_edge (*e_cache, e, this);
- if (e_cache->empty_p ())
- m_freelist.safe_push (e_cache);
- else
+ gcc_checking_assert (e_cache->empty_p ());
+
+ // If there is a single pred, check if there are any ranges on
+ // the edge and start with those.
+ if (single_pred_p (bb))
{
- if (edge_0)
- m_e0[e->src->index] = e_cache;
- else
- m_e1[e->src->index] = e_cache;
+ gori_on_edge (*e_cache, EDGE_PRED (bb, 0), this);
+ if (!e_cache->empty_p () && dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nEdge ranges BB %d->%d\n",
+ EDGE_PRED (bb, 0)->src->index, bb->index);
+ e_cache->dump(dump_file);
+ }
}
-}
+ // If the dominator had any ranges registered, integrate those.
+ if (dom_bb && m_bb [dom_bb->index])
+ e_cache->merge (*(m_bb[dom_bb->index]));
-// Preprocess block BB. If there are any outgoing edges, precalculate
-// the outgoing ranges and store them. Note these are done before
-// we process the block, so global values have not been set yet.
-// These are "pure" outgoing ranges inflicted by the condition.
+ // If there are no ranges, this block has no contextual ranges.
+ if (e_cache->empty_p ())
+ m_freelist.safe_push (e_cache);
+ else
+ m_bb[bb->index] = e_cache;
-void
-dom_ranger::pre_bb (basic_block bb)
-{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "#FVRP entering BB %d\n", bb->index);
-
- // Next, see if this block needs outgoing edges calculated.
- gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
- if (!gsi_end_p (gsi))
{
- gimple *s = gsi_stmt (gsi);
- if (is_a<gcond *> (s) && gimple_range_op_handler::supported_p (s))
+ if (m_bb[bb->index])
{
- maybe_push_edge (EDGE_SUCC (bb, 0), true);
- maybe_push_edge (EDGE_SUCC (bb, 1), false);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- if (m_e0[bb->index])
- {
- fprintf (dump_file, "\nEdge ranges BB %d->%d\n",
- bb->index, EDGE_SUCC (bb, 0)->dest->index);
- m_e0[bb->index]->dump(dump_file);
- }
- if (m_e1[bb->index])
- {
- fprintf (dump_file, "\nEdge ranges BB %d->%d\n",
- bb->index, EDGE_SUCC (bb, 1)->dest->index);
- m_e1[bb->index]->dump(dump_file);
- }
- }
+ fprintf (dump_file, "all contextual ranges active:\n");
+ m_bb[bb->index]->dump (dump_file);
}
+ else
+ fprintf (dump_file, " NO contextual ranges active:\n");
}
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "#FVRP DONE entering BB %d\n", bb->index);
}
// Perform any post block processing.
void
-dom_ranger::post_bb (basic_block)
+dom_ranger::post_bb (basic_block bb)
{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "#FVRP POST BB %d\n", bb->index);
+ // If there were contextual ranges, clear them and put the
+ // object on the freelist.
+ if (m_bb[bb->index])
+ {
+ m_bb[bb->index]->clear ();
+ m_freelist.safe_push (m_bb[bb->index]);
+ m_bb[bb->index] = NULL;
+ }
}
diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h
index 180090bed15..91177567947 100644
--- a/gcc/gimple-range.h
+++ b/gcc/gimple-range.h
@@ -112,19 +112,15 @@ public:
virtual bool range_on_edge (vrange &r, edge e, tree expr) override;
virtual bool range_of_stmt (vrange &r, gimple *s, tree name = NULL) override;
- bool edge_range (vrange &r, edge e, tree name);
- void range_in_bb (vrange &r, basic_block bb, tree name);
void pre_bb (basic_block bb);
void post_bb (basic_block bb);
protected:
+ void range_in_bb (vrange &r, basic_block bb, tree name);
DISABLE_COPY_AND_ASSIGN (dom_ranger);
- void maybe_push_edge (edge e, bool edge_0);
ssa_cache m_global;
vec<ssa_lazy_cache *> m_freelist;
- vec<ssa_lazy_cache *> m_e0;
- vec<ssa_lazy_cache *> m_e1;
- bitmap m_pop_list;
+ vec<ssa_lazy_cache *> m_bb;
range_tracer tracer;
};
#endif // GCC_GIMPLE_RANGE_H
diff --git a/gcc/tree-vrp.cc b/gcc/tree-vrp.cc
index a3b1a5cd337..6e96b639b70 100644
--- a/gcc/tree-vrp.cc
+++ b/gcc/tree-vrp.cc
@@ -1284,11 +1284,13 @@ execute_fast_vrp (struct function *fun, bool final_p)
gcc_checking_assert (!fun->x_range_query);
fun->x_range_query = &dr;
+ get_range_query (fun)->create_relation_oracle ();
folder.substitute_and_fold ();
if (folder.m_unreachable)
folder.m_unreachable->remove ();
+ get_range_query (fun)->destroy_relation_oracle ();
fun->x_range_query = NULL;
return 0;
}
--
2.45.0