The following patch makes loop-distribution able to distribute loops that have control flow. It does so by adding control dependence edges to the RDG (removing the need for special-casing the loop exit condition and its dependencies). With this we can now distribute the loop of the testcase
for (i = 0; i < 1024; ++i) { a[i] = 0; if (i > 100) b[i] = i; } and generate a memset for zeroing a. The patch does not in any way add or adjust a cost model, so it remains necessary that there is a write in each partition (a cost model could also be that if we were able to factor out a control dependence from at least one partition then that is worthwhile on its own). In theory this should expose a greater number of loops to vectorization (if you'd enable -ftree-loop-distribution, that is). Bootstrapped (with -O2 -g -ftree-loop-distribution) on x86_64-unknown-linux-gnu, testing in progress. I'll leave it for comments until Monday (if anyone cares). The next restriction to go is that of us only distributing innermost loops. Thanks, Richard. 2013-09-13 Richard Biener <rguent...@suse.de> * tree-loop-distribution.c (enum rdg_dep_type): Add control_dd. (dot_rdg_1): Handle control_dd. (create_edge_for_control_dependence): New function. (create_rdg_edges): Add control dependences if asked for. (build_rdg): Likewise. (generate_loops_for_partition): If there are not necessary control stmts remove all their dependencies. (collect_condition_stmts, rdg_flag_loop_exits): Remove. (distribute_loop): Pass on control dependences. (tree_loop_distribution): Compute control dependences and remove restriction on number of loop nodes. * gcc.dg/tree-ssa/ldist-22.c: New testcase. Index: gcc/tree-loop-distribution.c =================================================================== *** gcc/tree-loop-distribution.c.orig 2013-09-13 13:34:49.000000000 +0200 --- gcc/tree-loop-distribution.c 2013-09-13 13:44:09.771379182 +0200 *************** enum rdg_dep_type *** 92,98 **** output_dd = 'o', /* Read After Read (RAR). */ ! input_dd = 'i' }; /* Dependence information attached to an edge of the RDG. */ --- 92,101 ---- output_dd = 'o', /* Read After Read (RAR). */ ! input_dd = 'i', ! ! /* Control dependence (execute conditional on). */ ! control_dd = 'c' }; /* Dependence information attached to an edge of the RDG. */ *************** dot_rdg_1 (FILE *file, struct graph *rdg *** 218,223 **** --- 221,230 ---- fprintf (file, "%d -> %d [label=anti] \n", i, e->dest); break; + case control_dd: + fprintf (file, "%d -> %d [label=control] \n", i, e->dest); + break; + default: gcc_unreachable (); } *************** create_rdg_edges_for_scalar (struct grap *** 325,334 **** } } /* Creates the edges of the reduced dependence graph RDG. */ static void ! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs) { int i; struct data_dependence_relation *ddr; --- 332,369 ---- } } + /* Creates an edge for the control dependences of BB to the vertex V. */ + + static void + create_edge_for_control_dependence (struct graph *rdg, basic_block bb, + int v, control_dependences *cd) + { + bitmap_iterator bi; + unsigned edge_n; + EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), + 0, edge_n, bi) + { + basic_block cond_bb = cd->get_edge (edge_n)->src; + gimple stmt = last_stmt (cond_bb); + if (stmt && is_ctrl_stmt (stmt)) + { + struct graph_edge *e; + int c = rdg_vertex_for_stmt (rdg, stmt); + if (c < 0) + continue; + + e = add_edge (rdg, c, v); + e->data = XNEW (struct rdg_edge); + RDGE_TYPE (e) = control_dd; + RDGE_RELATION (e) = NULL; + } + } + } + /* Creates the edges of the reduced dependence graph RDG. */ static void ! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd) { int i; struct data_dependence_relation *ddr; *************** create_rdg_edges (struct graph *rdg, vec *** 345,350 **** --- 380,400 ---- FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), iter, SSA_OP_DEF) create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); + + if (cd) + for (i = 0; i < rdg->n_vertices; i++) + { + gimple stmt = RDG_STMT (rdg, i); + if (gimple_code (stmt) == GIMPLE_PHI) + { + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) + create_edge_for_control_dependence (rdg, e->src, i, cd); + } + else + create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); + } } /* Build the vertices of the reduced dependence graph RDG. Return false *************** free_rdg (struct graph *rdg) *** 465,471 **** scalar dependence. */ static struct graph * ! build_rdg (vec<loop_p> loop_nest) { struct graph *rdg; vec<gimple> stmts; --- 515,521 ---- scalar dependence. */ static struct graph * ! build_rdg (vec<loop_p> loop_nest, control_dependences *cd) { struct graph *rdg; vec<gimple> stmts; *************** build_rdg (vec<loop_p> loop_nest) *** 497,503 **** free_rdg (rdg); return NULL; } ! create_rdg_edges (rdg, dependence_relations); dependence_relations.release (); datarefs.release (); --- 547,553 ---- free_rdg (rdg); return NULL; } ! create_rdg_edges (rdg, dependence_relations, cd); dependence_relations.release (); datarefs.release (); *************** generate_loops_for_partition (struct loo *** 699,710 **** && !is_gimple_debug (stmt) && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) { ! unlink_stmt_vdef (stmt); ! gsi_remove (&bsi, true); ! release_defs (stmt); } ! else ! gsi_next (&bsi); } } --- 749,776 ---- && !is_gimple_debug (stmt) && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) { ! /* Choose an arbitrary path through the empty CFG part ! that this unnecessary control stmt controls. */ ! if (gimple_code (stmt) == GIMPLE_COND) ! { ! gimple_cond_make_false (stmt); ! update_stmt (stmt); ! } ! else if (gimple_code (stmt) == GIMPLE_SWITCH) ! { ! gimple_switch_set_index ! (stmt, CASE_LOW (gimple_switch_label (stmt, 1))); ! update_stmt (stmt); ! } ! else ! { ! unlink_stmt_vdef (stmt); ! gsi_remove (&bsi, true); ! release_defs (stmt); ! continue; ! } } ! gsi_next (&bsi); } } *************** rdg_flag_vertex_and_dependent (struct gr *** 1031,1092 **** nodes.release (); } - /* Initialize CONDS with all the condition statements from the basic - blocks of LOOP. */ - - static void - collect_condition_stmts (struct loop *loop, vec<gimple> *conds) - { - unsigned i; - edge e; - vec<edge> exits = get_loop_exit_edges (loop); - - FOR_EACH_VEC_ELT (exits, i, e) - { - gimple cond = last_stmt (e->src); - - if (cond) - conds->safe_push (cond); - } - - exits.release (); - } - - /* Add to PARTITION all the exit condition statements for LOOPS - together with all their dependent statements determined from - RDG. */ - - static void - rdg_flag_loop_exits (struct graph *rdg, partition_t partition, - bitmap processed) - { - unsigned i; - bitmap_iterator bi; - vec<gimple> conds; - conds.create (3); - - EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi) - collect_condition_stmts (get_loop (cfun, i), &conds); - - while (!conds.is_empty ()) - { - gimple cond = conds.pop (); - int v = rdg_vertex_for_stmt (rdg, cond); - if (!already_processed_vertex_p (processed, v)) - { - bitmap saved_loops = BITMAP_ALLOC (NULL); - bitmap_copy (saved_loops, partition->loops); - rdg_flag_vertex_and_dependent (rdg, v, partition, processed); - EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops, - 0, i, bi) - collect_condition_stmts (get_loop (cfun, i), &conds); - BITMAP_FREE (saved_loops); - } - } - - conds.release (); - } - /* Returns a partition with all the statements needed for computing the vertex V of the RDG, also including the loop exit conditions. */ --- 1097,1102 ---- *************** build_rdg_partition_for_vertex (struct g *** 1096,1102 **** partition_t partition = partition_alloc (NULL, NULL); bitmap processed = BITMAP_ALLOC (NULL); rdg_flag_vertex_and_dependent (rdg, v, partition, processed); - rdg_flag_loop_exits (rdg, partition, processed); BITMAP_FREE (processed); return partition; } --- 1106,1111 ---- *************** partition_contains_all_rw (struct graph *** 1463,1469 **** Returns the number of distributed loops. */ static int ! distribute_loop (struct loop *loop, vec<gimple> stmts) { struct graph *rdg; vec<loop_p> loop_nest; --- 1472,1479 ---- Returns the number of distributed loops. */ static int ! distribute_loop (struct loop *loop, vec<gimple> stmts, ! control_dependences *cd) { struct graph *rdg; vec<loop_p> loop_nest; *************** distribute_loop (struct loop *loop, vec< *** 1479,1485 **** return 0; } ! rdg = build_rdg (loop_nest); if (!rdg) { if (dump_file && (dump_flags & TDF_DETAILS)) --- 1489,1495 ---- return 0; } ! rdg = build_rdg (loop_nest, cd); if (!rdg) { if (dump_file && (dump_flags & TDF_DETAILS)) *************** tree_loop_distribution (void) *** 1631,1636 **** --- 1641,1647 ---- loop_iterator li; bool changed = false; basic_block bb; + control_dependences *cd = NULL; FOR_ALL_BB (bb) { *************** tree_loop_distribution (void) *** 1660,1669 **** if (!optimize_loop_for_speed_p (loop)) continue; - /* Only distribute loops with a header and latch for now. */ - if (loop->num_nodes > 2) - continue; - /* Initialize the worklist with stmts we seed the partitions with. */ bbs = get_loop_body_in_dom_order (loop); for (i = 0; i < loop->num_nodes; ++i) --- 1671,1676 ---- *************** out: *** 1697,1703 **** free (bbs); if (work_list.length () > 0) ! nb_generated_loops = distribute_loop (loop, work_list); if (nb_generated_loops > 0) changed = true; --- 1704,1718 ---- free (bbs); if (work_list.length () > 0) ! { ! if (!cd) ! { ! calculate_dominance_info (CDI_POST_DOMINATORS); ! cd = new control_dependences (create_edge_list ()); ! free_dominance_info (CDI_POST_DOMINATORS); ! } ! nb_generated_loops = distribute_loop (loop, work_list, cd); ! } if (nb_generated_loops > 0) changed = true; *************** out: *** 1714,1719 **** --- 1729,1737 ---- work_list.release (); } + if (cd) + delete cd; + if (changed) { mark_virtual_operands_for_renaming (cfun); Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c 2013-09-13 14:06:48.135500474 +0200 *************** *** 0 **** --- 1,32 ---- + /* { dg-do run } */ + /* { dg-options "-O3 -fdump-tree-ldist-details" } */ + + extern void abort (void); + + int a[1024], b[1024]; + + void __attribute__((noinline,noclone)) + foo (void) + { + int i; + for (i = 0; i < 1024; ++i) + { + a[i] = 0; + if (i > 100) + b[i] = i; + } + } + + int main() + { + b[100] = 1; + foo (); + if (b[100] != 1 || b[101] != 101) + abort (); + if (a[0] != 0 || a[101] != 0) + abort (); + return; + } + + /* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */ + /* { dg-final { cleanup-tree-dump "ldist" } } */