On Wed, 3 Aug 2011, Richard Guenther wrote:

> 
> This fixes PR49957 by keeping the array index into a multi-dimensional
> array in optimal associated form which is ((off + outermost) + ...) + 
> innermost) + constant) so that dependence analysis can properly
> handle it.  It doesn't work right now because we build the
> expression in reverse order, fold thinks it can do some fancy and
> the expression is of signed type and thus we know it doesn't overflow
> but we also won't re-associate it to a more optimal form.
> 
> I tried reversing the loop in gfc_conv_array_ref but that doesn't
> work (for example aliasing_dummy_4.f90 ICEs), thus the funny way
> of chaining the pluses.
> 
> I also don't know if there is maybe another place we build similar
> expressions that should be adjusted, too - this one is where
> we build the expression for the testcase I looked at.
> 
> The patch doesn't make 410.bwaves measurably faster, but at least
> it also doesn't get slower.
> 
> Bootstrap and regtest is currently running on x86_64-unknown-linux-gnu,
> the reversed loop one was ok (well, apart from those 2-3 fails).
> 
> Comments?  Any idea why reversing the loop would break?
> The ICE I got is
> 
> /space/rguenther/src/svn/trunk/gcc/testsuite/gfortran.dg/aliasing_dummy_4.f90:
>  
> In function 'test_f90':
> /space/rguenther/src/svn/trunk/gcc/testsuite/gfortran.dg/aliasing_dummy_4.f90:21:0:
>  
> internal compiler error: in gfc_conv_constant, at 
> fortran/trans-const.c:387
> Please submit a full bug report,
> with preprocessed source if appropriate.
> See <http://gcc.gnu.org/bugs.html> for instructions.

Tobias noticed the testcase is optimized away, fixed.  Also fixed
is another oversight for another loop in 410.bwaves, with constant
first index.

Re-bootstrapping currently.  The previous patch tested ok.

Richard.

2011-08-03  Richard Guenther  <rguent...@suse.de>

        PR fortran/49957
        * trans-array.c (gfc_conv_array_ref): Build the array index
        expression in optimally associated order.

        * gfortran.dg/vect/O3-pr49957.f: New testcase.

Index: gcc/fortran/trans-array.c
===================================================================
--- gcc/fortran/trans-array.c   (revision 177094)
+++ gcc/fortran/trans-array.c   (working copy)
@@ -2634,7 +2634,7 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr
                    locus * where)
 {
   int n;
-  tree index;
+  tree index, offset, *indexp;
   tree tmp;
   tree stride;
   gfc_se indexse;
@@ -2669,9 +2669,16 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr
       return;
     }
 
-  index = gfc_index_zero_node;
+  offset = gfc_conv_array_offset (se->expr);
+  if (TREE_CODE (offset) != INTEGER_CST)
+    index = offset;
+  else
+    index = gfc_index_zero_node;
 
-  /* Calculate the offsets from all the dimensions.  */
+  indexp = &index;
+
+  /* Calculate the offsets from all the dimensions.  Make sure to associate
+     the final offset so that we form a chain of loop invariant summands.  */
   for (n = 0; n < ar->dimen; n++)
     {
       /* Calculate the index for this dimension.  */
@@ -2740,15 +2747,43 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr
       tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
                             indexse.expr, stride);
 
-      /* And add it to the total.  */
-      index = fold_build2_loc (input_location, PLUS_EXPR,
-                              gfc_array_index_type, index, tmp);
+      /* And add it to the total.  Avoid folding as that re-associates
+         in a non-optimal way.  We want to have constant offsets as
+        the outermost addition and the rest of the additions in order
+        of the loop depth.  */
+      if (!integer_zerop (index))
+       {
+         if (TREE_CODE (tmp) == INTEGER_CST)
+           {
+             bool reset = indexp == &index;
+             index = fold_build2_loc (input_location, PLUS_EXPR,
+                                      gfc_array_index_type, index, tmp);
+             if (reset)
+               {
+                 if (TREE_CODE (index) == PLUS_EXPR)
+                   indexp = &TREE_OPERAND (index, 0);
+                 else
+                   indexp = &index;
+               }
+           }
+         else
+           {
+             *indexp
+               = build2_loc (input_location, PLUS_EXPR,
+                             gfc_array_index_type, *indexp, tmp);
+             indexp = &TREE_OPERAND (*indexp, 0);
+           }
+       }
+      else
+       {
+         index = tmp;
+         indexp = &index;
+       }
     }
 
-  tmp = gfc_conv_array_offset (se->expr);
-  if (!integer_zerop (tmp))
+  if (TREE_CODE (offset) == INTEGER_CST)
     index = fold_build2_loc (input_location, PLUS_EXPR,
-                            gfc_array_index_type, index, tmp);
+                            gfc_array_index_type, index, offset);
 
   /* Access the calculated element.  */
   tmp = gfc_conv_array_data (se->expr);
Index: gcc/testsuite/gfortran.dg/vect/O3-pr49957.f
===================================================================
--- gcc/testsuite/gfortran.dg/vect/O3-pr49957.f (revision 0)
+++ gcc/testsuite/gfortran.dg/vect/O3-pr49957.f (revision 0)
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+      subroutine shell(nx,ny,nz,q,dq)
+      implicit none
+      integer i,j,k,l,nx,ny,nz
+      real*8 q(5,nx,ny),dq(5,nx,ny)
+         do j=1,ny
+            do i=1,nx
+               do l=1,5
+                  q(l,i,j)=q(l,i,j)+dq(l,i,j)
+               enddo
+            enddo
+         enddo
+      return
+      end
+! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail 
vect_no_align } } }

Reply via email to