On Fri, 18 May 2012, Richard Guenther wrote:

> 
> In PR53346 we vectorize a simple memset loop very inefficiently.
> But of course we should have detected this and transformed the
> loop into a memset!  Seems like we only do that if the original
> loop does sth else than memset as well.
> 
> Fixed as follows.
> 
> Bootstrap and regtest on x86_64-unknown-linux-gnu ongoing
> (I suppose that will really stress loop distribution now ;))

Committed as follows, with extra testsuite adjustments to avoid
memset loops.

Richard.

2012-05-18  Richard Guenther  <rguent...@suse.de>

        PR tree-optimization/53346
        * tree-loop-distribution.c (ldist_gen): Make sure to apply
        builtin transform even when only a single partition with
        all reads/writes exists.

        * gcc.dg/tree-ssa/ldist-18.c: New testcase.
        * gcc.target/i386/incoming-10.c: Adjust.
        * gcc.target/i386/incoming-11.c: Likewise.
        * gcc.target/i386/pr46295.c: Likewise.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c        (revision 187650)
--- gcc/tree-loop-distribution.c        (working copy)
*************** ldist_gen (struct loop *loop, struct gra
*** 1131,1138 ****
    BITMAP_FREE (processed);
    nbp = VEC_length (bitmap, partitions);
  
!   if (nbp <= 1
!       || partition_contains_all_rw (rdg, partitions))
      goto ldist_done;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
--- 1131,1141 ----
    BITMAP_FREE (processed);
    nbp = VEC_length (bitmap, partitions);
  
!   if (nbp == 0
!       || (nbp == 1
!         && !can_generate_builtin (rdg, VEC_index (bitmap, partitions, 0)))
!       || (nbp > 1
!         && partition_contains_all_rw (rdg, partitions)))
      goto ldist_done;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-18.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-18.c    (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-18.c    (revision 0)
***************
*** 0 ****
--- 1,12 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ftree-loop-distribute-patterns 
-fdump-tree-ldist-details" } */
+ 
+ void foo (int *p, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     p[i] = 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.target/i386/incoming-10.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-10.c (revision 187653)
+++ gcc/testsuite/gcc.target/i386/incoming-10.c (working copy)
@@ -12,7 +12,7 @@ void f()
 {
        int i;
        struct s s;
-       for (i = 0; i < sizeof(s.x) / sizeof(*s.x); i++) s.x[i] = 0;
+       for (i = 0; i < sizeof(s.x) / sizeof(*s.x); i++) s.x[i] = 1;
        g(&s);
 }
 
Index: gcc/testsuite/gcc.target/i386/incoming-11.c
===================================================================
--- gcc/testsuite/gcc.target/i386/incoming-11.c (revision 187653)
+++ gcc/testsuite/gcc.target/i386/incoming-11.c (working copy)
@@ -10,9 +10,9 @@ int q[100];
 void f()
 {
        int i;
-       for (i = 0; i < 100; i++) p[i] = 0;
+       for (i = 0; i < 100; i++) p[i] = 1;
        g();
-       for (i = 0; i < 100; i++) q[i] = 0;
+       for (i = 0; i < 100; i++) q[i] = 1;
 }
 
 /* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
Index: gcc/testsuite/gcc.target/i386/pr46295.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr46295.c     (revision 187653)
+++ gcc/testsuite/gcc.target/i386/pr46295.c     (working copy)
@@ -9,7 +9,7 @@ void Parse_Vector ()
    EXPRESS Express;
    int Terms;
    for (Terms = 0; Terms < 5; Terms++)
-     Express[Terms] = 0.0;
+     Express[Terms] = 1.0;
    Parse_Rel_Factor(Express,&Terms);
 }
 

Reply via email to