Hi,
this patch fixes an ICE in the parloops pass.
The ICE (when compiling the test-case in attached patch) follows from
the fact that here in gen_parallel_loop the call to
canonicalize_loop_ivs fails to "base all the induction variables in LOOP
on a single control one":
...
/* Base all the induction variables in LOOP on a single control one.*/
canonicalize_loop_ivs (loop, &nit, true);
...
This is caused by the fact that for one of the induction variables,
simple_iv no longer returns true (as was the case at the start of
gen_parallel_loop). This seems to be triggered by the fact that during
loop_version we call scev_reset (although I'm not sure why when
recalculating scev info we're not reaching the same conclusion as before).
The patch fixes the ICE by caching the affine_ivs as calculated by
simple_iv before calling loop_version, and reusing those in
canonicalize_loop_ivs (instead of calling simple_iv again).
Bootstrapped and reg-tested on x86_64.
OK for stage1?
I'm not sure if this is minimal/low-impact enough for stage4. If not, I
can rework the bit of the patch that adds an assert after
canonicalize_loop_ivs into a patch that aborts the optimization instead
of ICE-ing.
Thanks,
- Tom
[parloops] Use cached affine_ivs canonicalize_loop_ivs
2018-02-22 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/83126
* tree-ssa-loop-manip.c (rewrite_phi_with_iv): Add and handle piv
parameter.
(rewrite_all_phi_nodes_with_iv, canonicalize_loop_ivs): Add and handle
simple_ivs parameter.
* tree-ssa-loop-manip.h (canonicalize_loop_ivs): Declare simple_ivs
parameter and default to NULL.
* tree-parloops.c (num_phis): New function.
(gen_parallel_loop): Add simple_ivs argument to canonicalize_loop_ivs
call.
---
gcc/testsuite/gcc.dg/graphite/pr83126.c | 19 +++++++++++++++
gcc/tree-parloops.c | 43 +++++++++++++++++++++++++++++++--
gcc/tree-ssa-loop-manip.c | 41 ++++++++++++++++++++++++-------
gcc/tree-ssa-loop-manip.h | 6 ++++-
4 files changed, 97 insertions(+), 12 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/graphite/pr83126.c b/gcc/testsuite/gcc.dg/graphite/pr83126.c
new file mode 100644
index 0000000..663d059
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/pr83126.c
@@ -0,0 +1,19 @@
+/* { dg-additional-options "-w -ftree-parallelize-loops=2 -floop-parallelize-all -O1" } */
+
+void
+ew (unsigned short int c9, int stuff)
+{
+ int e1;
+
+ for (;;)
+ {
+ unsigned int *by = &e1;
+ int *fd = &stuff;
+
+ *fd = c9;
+ fd = *fd;
+ if (*fd != 0)
+ for (*by = 0; *by < 2; ++*by)
+ c9 *= e1;
+ }
+}
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index e44ad5e..5971680 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2235,6 +2235,25 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
calculate_dominance_info (CDI_DOMINATORS);
}
+/* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
+ virtual phi. */
+
+static unsigned int
+num_phis (basic_block bb, bool count_virtual_p)
+{
+ unsigned int nr_phis = 0;
+ gphi_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
+ continue;
+
+ nr_phis++;
+ }
+
+ return nr_phis;
+}
+
/* Generates code to execute the iterations of LOOP in N_THREADS
threads in parallel, which can be 0 if that number is to be determined
later.
@@ -2326,6 +2345,24 @@ gen_parallel_loop (struct loop *loop,
if (stmts)
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+ /* The loop_version call below calls gimple_duplicate_loop_to_header_edge,
+ which calls scev_reset. So a loop header phi node that classifies as
+ simple_iv here, may not classify as such anymore after loop_version, which
+ will make canonicalize_loop_ivs fail to "base all the induction variables
+ in LOOP on a single control one". We fix this by storing the affine_iv
+ result of the simple_iv call in a map from phi node result to affine_iv,
+ and passing that map as an argument to canonicalize_loop_ivs. */
+ std::map<tree,affine_iv> simple_ivs;
+ gphi_iterator psi;
+ for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
+ {
+ gphi *phi = psi.phi ();
+ affine_iv iv;
+ tree res = PHI_RESULT (phi);
+ if (simple_iv (loop, loop, res, &iv, true))
+ simple_ivs.insert (std::pair<tree,affine_iv> (res, iv));
+ }
+
if (!oacc_kernels_p)
{
if (loop->inner)
@@ -2364,12 +2401,14 @@ gen_parallel_loop (struct loop *loop,
profile_probability::unlikely (),
profile_probability::likely (),
profile_probability::unlikely (), true);
- update_ssa (TODO_update_ssa);
free_original_copy_tables ();
}
/* Base all the induction variables in LOOP on a single control one. */
- canonicalize_loop_ivs (loop, &nit, true);
+ canonicalize_loop_ivs (loop, &nit, true, &simple_ivs);
+ gcc_assert (num_phis (loop->header, false)
+ == reduction_list->elements () + 1);
+ update_ssa (TODO_update_ssa);
/* Ensure that the exit condition is the first statement in the loop.
The common case is that latch of the loop is empty (apart from the
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index bf425af..1054ef3 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "params.h"
#include "tree-inline.h"
+#include <map>
/* All bitmaps for rewriting into loop-closed SSA go on this obstack,
so that we can free them all at once. */
@@ -1427,13 +1428,14 @@ tree_unroll_loop (struct loop *loop, unsigned factor,
}
/* Rewrite the phi node at position PSI in function of the main
- induction variable MAIN_IV and insert the generated code at GSI. */
+ induction variable MAIN_IV and insert the generated code at GSI. PIV
+ optionally contains the associcated affine_iv. */
static void
rewrite_phi_with_iv (loop_p loop,
gphi_iterator *psi,
gimple_stmt_iterator *gsi,
- tree main_iv)
+ tree main_iv, affine_iv *piv)
{
affine_iv iv;
gassign *stmt;
@@ -1446,7 +1448,9 @@ rewrite_phi_with_iv (loop_p loop,
return;
}
- if (!simple_iv (loop, loop, res, &iv, true))
+ if (piv)
+ iv = *piv;
+ else if (!simple_iv (loop, loop, res, &iv, true))
{
gsi_next (psi);
return;
@@ -1468,10 +1472,12 @@ rewrite_phi_with_iv (loop_p loop,
}
/* Rewrite all the phi nodes of LOOP in function of the main induction
- variable MAIN_IV. */
+ variable MAIN_IV. SIMPLE_IVS contains an optional map from phi node result
+ to affive_iv. */
static void
-rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
+rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv,
+ std::map<tree,affine_iv> *simple_ivs)
{
unsigned i;
basic_block *bbs = get_loop_body_in_dom_order (loop);
@@ -1486,7 +1492,22 @@ rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
continue;
for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
- rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
+ {
+ affine_iv *piv = NULL;
+ affine_iv iv;
+ gphi *phi = psi.phi ();
+ if (simple_ivs)
+ {
+ std::map<tree,affine_iv>::iterator search
+ = simple_ivs->find (PHI_RESULT (phi));
+ if (search != simple_ivs->end ())
+ {
+ iv = search->second;
+ piv = &iv;
+ }
+ }
+ rewrite_phi_with_iv (loop, &psi, &gsi, main_iv, piv);
+ }
}
free (bbs);
@@ -1498,11 +1519,13 @@ rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
converted to the larger type, the conversion code is inserted before the
loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true,
the induction variable is incremented in the loop latch, otherwise it is
- incremented in the loop header. Return the induction variable that was
+ incremented in the loop header. SIMPLE_IVS contains an optional map from
+ phi node result to affive_iv. Return the induction variable that was
created. */
tree
-canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch)
+canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch,
+ std::map<tree,affine_iv> *simple_ivs)
{
unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
unsigned original_precision = precision;
@@ -1557,7 +1580,7 @@ canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch)
create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
loop, &gsi, bump_in_latch, &var_before, NULL);
- rewrite_all_phi_nodes_with_iv (loop, var_before);
+ rewrite_all_phi_nodes_with_iv (loop, var_before, simple_ivs);
stmt = as_a <gcond *> (last_stmt (exit->src));
/* Make the loop exit if the control condition is not satisfied. */
diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
index 390ac6f..a96ccb7 100644
--- a/gcc/tree-ssa-loop-manip.h
+++ b/gcc/tree-ssa-loop-manip.h
@@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_TREE_SSA_LOOP_MANIP_H
#define GCC_TREE_SSA_LOOP_MANIP_H
+#include <map>
+#include "tree-ssa-loop.h"
+
typedef void (*transform_callback)(struct loop *, void *);
extern void create_iv (tree, tree, tree, struct loop *, gimple_stmt_iterator *,
@@ -54,7 +57,8 @@ extern void tree_transform_and_unroll_loop (struct loop *, unsigned,
transform_callback, void *);
extern void tree_unroll_loop (struct loop *, unsigned,
edge, struct tree_niter_desc *);
-extern tree canonicalize_loop_ivs (struct loop *, tree *, bool);
+extern tree canonicalize_loop_ivs (struct loop *, tree *, bool,
+ std::map<tree,affine_iv> *simple_ivs = NULL);