Loop distribution fails to honor output dependences - the following
patch handles them the same as anti dependences.  All the code gathering
up dependences is a little weird and probably needs some TLC.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-08-30  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/58223
        * tree-loop-distribution.c (has_anti_dependence): Rename to ...
        (has_anti_or_output_dependence): ... this and adjust to also
        look for output dependences.
        (mark_nodes_having_upstream_mem_writes): Adjust.
        (rdg_flag_uses): Likewise.

        * gcc.dg/torture/pr58223.c: New testcase.
        * gcc.dg/tree-ssa/ldist-16.c: Flip expected behavior.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c        (revision 202068)
--- gcc/tree-loop-distribution.c        (working copy)
*************** already_processed_vertex_p (bitmap proce
*** 542,558 ****
          || !bitmap_bit_p (remaining_stmts, v));
  }
  
! /* Returns NULL when there is no anti-dependence among the successors
!    of vertex V, otherwise returns the edge with the anti-dep.  */
  
  static struct graph_edge *
! has_anti_dependence (struct vertex *v)
  {
    struct graph_edge *e;
  
    if (v->succ)
      for (e = v->succ; e; e = e->succ_next)
!       if (RDGE_TYPE (e) == anti_dd)
        return e;
  
    return NULL;
--- 542,560 ----
          || !bitmap_bit_p (remaining_stmts, v));
  }
  
! /* Returns NULL when there is no anti-dependence or output-dependence
!    among the successors of vertex V, otherwise returns the edge with the
!    dependency.  */
  
  static struct graph_edge *
! has_anti_or_output_dependence (struct vertex *v)
  {
    struct graph_edge *e;
  
    if (v->succ)
      for (e = v->succ; e; e = e->succ_next)
!       if (RDGE_TYPE (e) == anti_dd
!         || RDGE_TYPE (e) == output_dd)
        return e;
  
    return NULL;
*************** mark_nodes_having_upstream_mem_writes (s
*** 604,614 ****
                || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
                /* In anti dependences the read should occur before
                   the write, this is why both the read and the write
!                  should be placed in the same partition.  */
!               || has_anti_dependence (&(rdg->vertices[x])))
!             {
!               bitmap_set_bit (upstream_mem_writes, x);
!             }
          }
  
        nodes.release ();
--- 606,615 ----
                || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
                /* In anti dependences the read should occur before
                   the write, this is why both the read and the write
!                  should be placed in the same partition.  In output
!                  dependences the writes order need to be preserved.  */
!               || has_anti_or_output_dependence (&(rdg->vertices[x])))
!             bitmap_set_bit (upstream_mem_writes, x);
          }
  
        nodes.release ();
*************** rdg_flag_uses (struct graph *rdg, int u,
*** 637,643 ****
    use_operand_p use_p;
    struct vertex *x = &(rdg->vertices[u]);
    gimple stmt = RDGV_STMT (x);
!   struct graph_edge *anti_dep = has_anti_dependence (x);
  
    /* Keep in the same partition the destination of an antidependence,
       because this is a store to the exact same location.  Putting this
--- 638,644 ----
    use_operand_p use_p;
    struct vertex *x = &(rdg->vertices[u]);
    gimple stmt = RDGV_STMT (x);
!   struct graph_edge *anti_dep = has_anti_or_output_dependence (x);
  
    /* Keep in the same partition the destination of an antidependence,
       because this is a store to the exact same location.  Putting this
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c    (revision 202068)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c    (working copy)
*************** void foo (int n)
*** 14,21 ****
      }
  }
  
! /* We should apply loop distribution and generate a memset (0).  */
  
! /* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
! /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
--- 14,21 ----
      }
  }
  
! /* We should not apply loop distribution and not generate a memset (0).  */
  
! /* { dg-final { scan-tree-dump "Loop 1 is the same" "ldist" } } */
! /* { dg-final { scan-tree-dump-times "generated memset zero" 0 "ldist" } } */
  /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.dg/torture/pr58223.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr58223.c      (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr58223.c      (working copy)
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do run } */
+ 
+ extern void abort (void);
+ int a[2], b;
+ 
+ int main ()
+ {
+   for (b = 0; b < 2; b++)
+     {
+       a[0] = 1;
+       a[b] = 0;
+     }
+   if (a[0] != 1)
+     abort ();
+   return 0;
+ }

Reply via email to