For gcc.dg/graphite/scop-4.c when we analyze data-refs of the fist two inner loops (with the scalar BB in between)
for (i = 1; i < 100; i++) /// loop 1 { -- scop start for (j = 1; j < 80; j++) /// loop 2 a[j][i] = a[j+1][2*i-1*j] + 12; b[i] = b[i-1] + 10; for (j = 1; j < 60; j++) /// loop 3 a[j][i] = a[j+1][i-1] + 8; -- scop end bar (); if (i == 23) b[i] = a[i-1][i] + 6; } we end up asking data-ref analysis to analyze b[i] and b[i-1] with respect to loop == nest == 2. That doesn't make much sense in itself and thus we get some weird answers. The one that confuses us is {0, +, 1}_1 because later when we try to extract_affine_chrec on that ISL aborts because we feed it -1 as the dimension for the loop. While we can with some effort suppress those CHRECs I think the better solution is to make GRAPHITE not ask SCEV to do this kind of analysis but consistently analyze DRs in the containing loop even if that loop is not within the current region. The idea is that we'd transform the above to for (i = 1; i < 100; i++) /// loop 1 { -- scop start do { i' = i; for (j = 1; j < 80; j++) /// loop 2 a[j][i'] = a[j+1][2*i'-1*j] + 12; b[i'] = b[i'-1] + 10; for (j = 1; j < 60; j++) /// loop 3 a[j][i'] = a[j+1][i'-1] + 8; } while (0); -- scop end bar (); if (i == 23) b[i] = a[i-1][i] + 6; } basically wrap each SCOP inside a loop that doesn't iterate. This should make analyzing the data-refs possible and enable dependence analysis for them. I needed to create an extra dimension for this "loop" and adjust some +- 1 stuff (ugh, this can need some better abstraction...). On the way the stmt_has_simple_data_refs_p/try_generate_gimple_bb changes fix PR82355 as well. The copy_bb_and_scalar_dependences change is to avoid extra code-gen errors on some existing graphite testcases where we now perform more elaborate transforms. I probably started with one of the more difficult code-gen errors here so bear with my lack of GRAPHITE knowledge. Does this look reasonable? Bootstrapped and tested on x86_64-unknown-linux-gnu (with & without -floop-nest-optimize -fgraphite-identity). I've verified that the graphite testsuite now has less ICEs when turning code-gen errors into ICEs. Results of throwing this onto SPEC CPU 2006 are somewhat hard to interpret. Before the patch we see loop nest optimized: 105 loop nest not optimized, code generation error: 42 loop nest not optimized, optimized schedule is identical to original schedule: 119 loop nest not optimized, optimization timed out: 36 loop nest not optimized, ISL signalled an error: 6 loop nest: 308 and after loop nest optimized: 87 loop nest not optimized, code generation error: 93 loop nest not optimized, optimized schedule is identical to original schedule: 216 loop nest not optimized, optimization timed out: 46 loop nest not optimized, ISL signalled an error: 8 loop nest: 450 first of all this means we've rejected fewer SCOPs initially (very likely because of alias-sets now matching and maybe because asking SCEV for sth sensible turns out to result in less scev_not_known stuff in affected DRs). I'm not sure why we end up optimizing less (maybe because I don't analyze DRs in loops contained in the region with respect to the containing loop as well -- I'd have to pre-compute that loop). Anyway. Ok for trunk? Thanks, Richard. 2017-09-29 Richard Biener <rguent...@suse.de> PR tree-optimization/82355 * graphite-isl-ast-to-gimple.c (build_iv_mapping): Also build a mapping for the enclosing loop but avoid generating one for the loop tree root. (copy_bb_and_scalar_dependences): Remove premature codegen error on PHIs in blocks duplicated into multiple places. * graphite-scop-detection.c (scop_detection::stmt_has_simple_data_refs_p): For a loop not in the region use it as loop and nest to analyze the DR in. (try_generate_gimple_bb): Likewise. * graphite-sese-to-poly.c (extract_affine_chrec): Adjust. (add_loop_constraints): For blocks in a loop not in the region create a dimension with a single iteration. * sese.h (gbb_loop_at_index): Remove assert. * gcc.dg/graphite/fuse-1.c: Adjust. * gcc.dg/graphite/fuse-2.c: Likewise. * gcc.dg/graphite/pr82355.c: New testcase. Index: gcc/graphite-isl-ast-to-gimple.c =================================================================== --- gcc/graphite-isl-ast-to-gimple.c (revision 253282) +++ gcc/graphite-isl-ast-to-gimple.c (working copy) @@ -774,8 +775,10 @@ build_iv_mapping (vec<tree> iv_map, gimp if (codegen_error_p ()) t = integer_zero_node; - loop_p old_loop = gbb_loop_at_index (gbb, region, i - 1); - iv_map[old_loop->num] = t; + loop_p old_loop = gbb_loop_at_index (gbb, region, i - 2); + /* Record sth only for real loops. */ + if (old_loop->num != 0) + iv_map[old_loop->num] = t; } } @@ -2587,18 +2590,6 @@ edge translate_isl_ast_to_gimple:: copy_bb_and_scalar_dependences (basic_block bb, edge next_e, vec<tree> iv_map) { int num_phis = number_of_phi_nodes (bb); - - if (region->copied_bb_map->get (bb)) - { - /* FIXME: we should be able to handle phi nodes with args coming from - outside the region. */ - if (num_phis) - { - set_codegen_error (); - return NULL; - } - } - basic_block new_bb = NULL; if (bb_contains_loop_close_phi_nodes (bb)) { Index: gcc/graphite-scop-detection.c =================================================================== --- gcc/graphite-scop-detection.c (revision 253282) +++ gcc/graphite-scop-detection.c (working copy) @@ -1055,10 +1055,12 @@ scop_detection::graphite_can_represent_e bool scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt) { - loop_p nest = outermost_loop_in_sese (scop, gimple_bb (stmt)); + loop_p nest; loop_p loop = loop_containing_stmt (stmt); if (!loop_in_sese_p (loop, scop)) - loop = nest; + nest = loop; + else + nest = outermost_loop_in_sese (scop, gimple_bb (stmt)); auto_vec<data_reference_p> drs; if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs)) @@ -1450,11 +1452,12 @@ try_generate_gimple_bb (scop_p scop, bas vec<scalar_use> reads = vNULL; sese_l region = scop->scop_info->region; - loop_p nest = outermost_loop_in_sese (region, bb); - + loop_p nest; loop_p loop = bb->loop_father; if (!loop_in_sese_p (loop, region)) - loop = nest; + nest = loop; + else + nest = outermost_loop_in_sese (region, bb); for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) Index: gcc/graphite-sese-to-poly.c =================================================================== --- gcc/graphite-sese-to-poly.c (revision 253282) +++ gcc/graphite-sese-to-poly.c (working copy) @@ -86,7 +86,7 @@ extract_affine_chrec (scop_p s, tree e, isl_pw_aff *lhs = extract_affine (s, CHREC_LEFT (e), isl_space_copy (space)); isl_pw_aff *rhs = extract_affine (s, CHREC_RIGHT (e), isl_space_copy (space)); isl_local_space *ls = isl_local_space_from_space (space); - unsigned pos = sese_loop_depth (s->scop_info->region, get_chrec_loop (e)) - 1; + unsigned pos = sese_loop_depth (s->scop_info->region, get_chrec_loop (e)); isl_aff *loop = isl_aff_set_coefficient_si (isl_aff_zero_on_domain (ls), isl_dim_in, pos, 1); isl_pw_aff *l = isl_pw_aff_from_aff (loop); @@ -756,10 +759,10 @@ add_loop_constraints (scop_p scop, __isl return domain; const sese_l ®ion = scop->scop_info->region; if (!loop_in_sese_p (loop, region)) - return domain; - - /* Recursion all the way up to the context loop. */ - domain = add_loop_constraints (scop, domain, loop_outer (loop), context); + ; + else + /* Recursion all the way up to the context loop. */ + domain = add_loop_constraints (scop, domain, loop_outer (loop), context); /* Then, build constraints over the loop in post-order: outer to inner. */ @@ -770,6 +773,21 @@ add_loop_constraints (scop_p scop, __isl domain = add_iter_domain_dimension (domain, loop, scop); isl_space *space = isl_set_get_space (domain); + if (!loop_in_sese_p (loop, region)) + { + /* 0 == loop_i */ + isl_local_space *ls = isl_local_space_from_space (space); + isl_constraint *c = isl_equality_alloc (ls); + c = isl_constraint_set_coefficient_si (c, isl_dim_set, loop_index, 1); + if (dump_file) + { + fprintf (dump_file, "[sese-to-poly] adding constraint to the domain: "); + print_isl_constraint (dump_file, c); + } + domain = isl_set_add_constraint (domain, c); + return domain; + } + /* 0 <= loop_i */ isl_local_space *ls = isl_local_space_from_space (isl_space_copy (space)); isl_constraint *c = isl_inequality_alloc (ls); Index: gcc/sese.h =================================================================== --- gcc/sese.h (revision 253282) +++ gcc/sese.h (working copy) @@ -326,8 +326,6 @@ gbb_loop_at_index (gimple_poly_bb_p gbb, while (--depth > index) loop = loop_outer (loop); - gcc_assert (loop_in_sese_p (loop, region)); - return loop; } Index: gcc/testsuite/gcc.dg/graphite/fuse-1.c =================================================================== --- gcc/testsuite/gcc.dg/graphite/fuse-1.c (revision 253282) +++ gcc/testsuite/gcc.dg/graphite/fuse-1.c (working copy) @@ -1,15 +1,15 @@ /* Check that the two loops are fused and that we manage to fold the two xor operations. */ -/* { dg-options "-O2 -floop-nest-optimize -fdump-tree-forwprop-all -fdump-tree-graphite-all" } */ +/* { dg-options "-O2 -floop-nest-optimize -fdump-tree-forwprop4-all -fdump-tree-graphite-all" } */ /* Make sure we fuse the loops like this: AST generated by isl: for (int c0 = 0; c0 <= 99; c0 += 1) { - S_3(c0); - S_6(c0); - S_9(c0); + S_3(0, c0); + S_6(0, c0); + S_9(0, c0); } */ -/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*\\}" 1 "graphite" } } */ +/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*\\}" 1 "graphite" } } */ /* Check that after fusing the loops, the scalar computation is also fused. */ /* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 "forwprop4" } } */ Index: gcc/testsuite/gcc.dg/graphite/fuse-2.c =================================================================== --- gcc/testsuite/gcc.dg/graphite/fuse-2.c (revision 253282) +++ gcc/testsuite/gcc.dg/graphite/fuse-2.c (working copy) @@ -3,13 +3,13 @@ /* Make sure we fuse the loops like this: AST generated by isl: for (int c0 = 0; c0 <= 99; c0 += 1) { - S_3(c0); - S_6(c0); - S_9(c0); + S_3(0, c0); + S_6(0, c0); + S_9(0, c0); } */ -/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*\\}" 1 "graphite" } } */ +/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*\\}" 1 "graphite" } } */ #define MAX 100 int A[MAX], B[MAX], C[MAX]; Index: gcc/testsuite/gcc.dg/graphite/pr82355.c =================================================================== --- gcc/testsuite/gcc.dg/graphite/pr82355.c (nonexistent) +++ gcc/testsuite/gcc.dg/graphite/pr82355.c (working copy) @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -floop-nest-optimize" } */ + +int dc, at; + +void +tv (int *ld, int jl) +{ + for (;;) + { + if (dc != 0) + for (;;) + { + *ld = !!(*ld) + 1; + for (dc = 0; dc < 3; ++dc) + at = (jl != 0) ? *ld : 0; + } + + while (at != 0) + { + } + } +}