The following patch makes loop-distribution able to distribute loops
that have control flow.  It does so by adding control dependence
edges to the RDG (removing the need for special-casing the loop
exit condition and its dependencies).  With this we can now
distribute the loop of the testcase

  for (i = 0; i < 1024; ++i)
    {
      a[i] = 0;
      if (i > 100)
        b[i] = i;
    }

and generate a memset for zeroing a.  The patch does not in any
way add or adjust a cost model, so it remains necessary that
there is a write in each partition (a cost model could also be
that if we were able to factor out a control dependence from
at least one partition then that is worthwhile on its own).

In theory this should expose a greater number of loops to
vectorization (if you'd enable -ftree-loop-distribution, that is).

Bootstrapped (with -O2 -g -ftree-loop-distribution) on 
x86_64-unknown-linux-gnu, testing in progress.

I'll leave it for comments until Monday (if anyone cares).

The next restriction to go is that of us only distributing innermost
loops.

Thanks,
Richard.


2013-09-13  Richard Biener  <rguent...@suse.de>

        * tree-loop-distribution.c (enum rdg_dep_type): Add control_dd.
        (dot_rdg_1): Handle control_dd.
        (create_edge_for_control_dependence): New function.
        (create_rdg_edges): Add control dependences if asked for.
        (build_rdg): Likewise.
        (generate_loops_for_partition): If there are not necessary
        control stmts remove all their dependencies.
        (collect_condition_stmts, rdg_flag_loop_exits): Remove.
        (distribute_loop): Pass on control dependences.
        (tree_loop_distribution): Compute control dependences and remove
        restriction on number of loop nodes.

        * gcc.dg/tree-ssa/ldist-22.c: New testcase.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c.orig   2013-09-13 13:34:49.000000000 +0200
--- gcc/tree-loop-distribution.c        2013-09-13 13:44:09.771379182 +0200
*************** enum rdg_dep_type
*** 92,98 ****
    output_dd = 'o',
  
    /* Read After Read (RAR).  */
!   input_dd = 'i'
  };
  
  /* Dependence information attached to an edge of the RDG.  */
--- 92,101 ----
    output_dd = 'o',
  
    /* Read After Read (RAR).  */
!   input_dd = 'i',
! 
!   /* Control dependence (execute conditional on).  */
!   control_dd = 'c'
  };
  
  /* Dependence information attached to an edge of the RDG.  */
*************** dot_rdg_1 (FILE *file, struct graph *rdg
*** 218,223 ****
--- 221,230 ----
               fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
               break;
  
+          case control_dd:
+              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
+              break;
+ 
             default:
               gcc_unreachable ();
             }
*************** create_rdg_edges_for_scalar (struct grap
*** 325,334 ****
      }
  }
  
  /* Creates the edges of the reduced dependence graph RDG.  */
  
  static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
  {
    int i;
    struct data_dependence_relation *ddr;
--- 332,369 ----
      }
  }
  
+ /* Creates an edge for the control dependences of BB to the vertex V.  */
+ 
+ static void
+ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
+                                   int v, control_dependences *cd)
+ {
+   bitmap_iterator bi;
+   unsigned edge_n;
+   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
+                           0, edge_n, bi)
+     {
+       basic_block cond_bb = cd->get_edge (edge_n)->src;
+       gimple stmt = last_stmt (cond_bb);
+       if (stmt && is_ctrl_stmt (stmt))
+       {
+         struct graph_edge *e;
+         int c = rdg_vertex_for_stmt (rdg, stmt);
+         if (c < 0)
+           continue;
+ 
+         e = add_edge (rdg, c, v);
+         e->data = XNEW (struct rdg_edge);
+         RDGE_TYPE (e) = control_dd;
+         RDGE_RELATION (e) = NULL;
+       }
+     }
+ }
+ 
  /* Creates the edges of the reduced dependence graph RDG.  */
  
  static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
  {
    int i;
    struct data_dependence_relation *ddr;
*************** create_rdg_edges (struct graph *rdg, vec
*** 345,350 ****
--- 380,400 ----
      FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
                              iter, SSA_OP_DEF)
        create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
+ 
+   if (cd)
+     for (i = 0; i < rdg->n_vertices; i++)
+       {
+       gimple stmt = RDG_STMT (rdg, i);
+       if (gimple_code (stmt) == GIMPLE_PHI)
+         {
+           edge_iterator ei;
+           edge e;
+           FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
+             create_edge_for_control_dependence (rdg, e->src, i, cd);
+         }
+       else
+         create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
+       }
  }
  
  /* Build the vertices of the reduced dependence graph RDG.  Return false
*************** free_rdg (struct graph *rdg)
*** 465,471 ****
     scalar dependence.  */
  
  static struct graph *
! build_rdg (vec<loop_p> loop_nest)
  {
    struct graph *rdg;
    vec<gimple> stmts;
--- 515,521 ----
     scalar dependence.  */
  
  static struct graph *
! build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
  {
    struct graph *rdg;
    vec<gimple> stmts;
*************** build_rdg (vec<loop_p> loop_nest)
*** 497,503 ****
        free_rdg (rdg);
        return NULL;
      }
!   create_rdg_edges (rdg, dependence_relations);
    dependence_relations.release ();
    datarefs.release ();
  
--- 547,553 ----
        free_rdg (rdg);
        return NULL;
      }
!   create_rdg_edges (rdg, dependence_relations, cd);
    dependence_relations.release ();
    datarefs.release ();
  
*************** generate_loops_for_partition (struct loo
*** 699,710 ****
              && !is_gimple_debug (stmt)
              && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
            {
!             unlink_stmt_vdef (stmt);
!             gsi_remove (&bsi, true);
!             release_defs (stmt);
            }
!         else
!           gsi_next (&bsi);
        }
      }
  
--- 749,776 ----
              && !is_gimple_debug (stmt)
              && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
            {
!             /* Choose an arbitrary path through the empty CFG part
!                that this unnecessary control stmt controls.  */
!             if (gimple_code (stmt) == GIMPLE_COND)
!               {
!                 gimple_cond_make_false (stmt);
!                 update_stmt (stmt);
!               }
!             else if (gimple_code (stmt) == GIMPLE_SWITCH)
!               {
!                 gimple_switch_set_index
!                     (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
!                 update_stmt (stmt);
!               }
!             else
!               {
!                 unlink_stmt_vdef (stmt);
!                 gsi_remove (&bsi, true);
!                 release_defs (stmt);
!                 continue;
!               }
            }
!         gsi_next (&bsi);
        }
      }
  
*************** rdg_flag_vertex_and_dependent (struct gr
*** 1031,1092 ****
    nodes.release ();
  }
  
- /* Initialize CONDS with all the condition statements from the basic
-    blocks of LOOP.  */
- 
- static void
- collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
- {
-   unsigned i;
-   edge e;
-   vec<edge> exits = get_loop_exit_edges (loop);
- 
-   FOR_EACH_VEC_ELT (exits, i, e)
-     {
-       gimple cond = last_stmt (e->src);
- 
-       if (cond)
-       conds->safe_push (cond);
-     }
- 
-   exits.release ();
- }
- 
- /* Add to PARTITION all the exit condition statements for LOOPS
-    together with all their dependent statements determined from
-    RDG.  */
- 
- static void
- rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
-                    bitmap processed)
- {
-   unsigned i;
-   bitmap_iterator bi;
-   vec<gimple> conds;
-   conds.create (3);
- 
-   EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi)
-     collect_condition_stmts (get_loop (cfun, i), &conds);
- 
-   while (!conds.is_empty ())
-     {
-       gimple cond = conds.pop ();
-       int v = rdg_vertex_for_stmt (rdg, cond);
-       if (!already_processed_vertex_p (processed, v))
-       {
-         bitmap saved_loops = BITMAP_ALLOC (NULL);
-         bitmap_copy (saved_loops, partition->loops);
-         rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
-         EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops,
-                                         0, i, bi)
-           collect_condition_stmts (get_loop (cfun, i), &conds);
-         BITMAP_FREE (saved_loops);
-       }
-     }
- 
-   conds.release ();
- }
- 
  /* Returns a partition with all the statements needed for computing
     the vertex V of the RDG, also including the loop exit conditions.  */
  
--- 1097,1102 ----
*************** build_rdg_partition_for_vertex (struct g
*** 1096,1102 ****
    partition_t partition = partition_alloc (NULL, NULL);
    bitmap processed = BITMAP_ALLOC (NULL);
    rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
-   rdg_flag_loop_exits (rdg, partition, processed);
    BITMAP_FREE (processed);
    return partition;
  }
--- 1106,1111 ----
*************** partition_contains_all_rw (struct graph
*** 1463,1469 ****
     Returns the number of distributed loops.  */
  
  static int
! distribute_loop (struct loop *loop, vec<gimple> stmts)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
--- 1472,1479 ----
     Returns the number of distributed loops.  */
  
  static int
! distribute_loop (struct loop *loop, vec<gimple> stmts,
!                control_dependences *cd)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
*************** distribute_loop (struct loop *loop, vec<
*** 1479,1485 ****
        return 0;
      }
  
!   rdg = build_rdg (loop_nest);
    if (!rdg)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
--- 1489,1495 ----
        return 0;
      }
  
!   rdg = build_rdg (loop_nest, cd);
    if (!rdg)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
*************** tree_loop_distribution (void)
*** 1631,1636 ****
--- 1641,1647 ----
    loop_iterator li;
    bool changed = false;
    basic_block bb;
+   control_dependences *cd = NULL;
  
    FOR_ALL_BB (bb)
      {
*************** tree_loop_distribution (void)
*** 1660,1669 ****
        if (!optimize_loop_for_speed_p (loop))
        continue;
  
-       /* Only distribute loops with a header and latch for now.  */
-       if (loop->num_nodes > 2)
-       continue;
- 
        /* Initialize the worklist with stmts we seed the partitions with.  */
        bbs = get_loop_body_in_dom_order (loop);
        for (i = 0; i < loop->num_nodes; ++i)
--- 1671,1676 ----
*************** out:
*** 1697,1703 ****
        free (bbs);
  
        if (work_list.length () > 0)
!       nb_generated_loops = distribute_loop (loop, work_list);
  
        if (nb_generated_loops > 0)
        changed = true;
--- 1704,1718 ----
        free (bbs);
  
        if (work_list.length () > 0)
!       {
!         if (!cd)
!           {
!             calculate_dominance_info (CDI_POST_DOMINATORS);
!             cd = new control_dependences (create_edge_list ());
!             free_dominance_info (CDI_POST_DOMINATORS);
!           }
!         nb_generated_loops = distribute_loop (loop, work_list, cd);
!       }
  
        if (nb_generated_loops > 0)
        changed = true;
*************** out:
*** 1714,1719 ****
--- 1729,1737 ----
        work_list.release ();
      }
  
+   if (cd)
+     delete cd;
+ 
    if (changed)
      {
        mark_virtual_operands_for_renaming (cfun);
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c
===================================================================
*** /dev/null   1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c    2013-09-13 14:06:48.135500474 
+0200
***************
*** 0 ****
--- 1,32 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+ 
+ extern void abort (void);
+ 
+ int a[1024], b[1024];
+ 
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i;
+   for (i = 0; i < 1024; ++i)
+     {
+       a[i] = 0;
+       if (i > 100)
+       b[i] = i;
+     }
+ }
+ 
+ int main()
+ {
+   b[100] = 1;
+   foo ();
+   if (b[100] != 1 || b[101] != 101)
+     abort ();
+   if (a[0] != 0 || a[101] != 0)
+     abort ();
+   return;
+ }
+ 
+ /* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */

Reply via email to