http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38318

Dominique d'Humieres <dominiq at lps dot ens.fr> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jh at suse dot cz,
                   |                            |rguenther at suse dot de

--- Comment #5 from Dominique d'Humieres <dominiq at lps dot ens.fr> 2010-10-07 
15:04:34 UTC ---
Another case of interest is "automatic arrays". An interesting example is the
polyhedron test nf.f90.
On Core2 Duo and Darwin the following patch


--- nf.f90    2005-10-11 22:53:32.000000000 +0200
+++ nf_v2.f90    2010-10-07 16:49:38.000000000 +0200
@@ -153,7 +153,7 @@ integer :: nx , nxy , nxyz , maxiter
 real(dpkind),dimension(nxyz):: ad,au1,au2,au3,x,b
 real(dpkind)::targrms

-real(dpkind),allocatable,dimension(:) :: r,q,p,z,g,gi
+real(dpkind),allocatable,dimension(:) :: r,q,p,z,g,gi,t,u
 real(dpkind):: alpha,beta,qr,qrp,rmserr
 integer :: iter , tbase , tgi , tcg , tickspersec , maxticks

@@ -163,7 +163,7 @@ call GetGI3D(1,nxyz)                 ! c
 call system_clock(tgi,tickspersec,maxticks)
 deallocate(g)

-allocate (r(nxyz),q(nxyz),p(nxyz),z(nxyz))
+allocate (r(nxyz),q(nxyz),p(nxyz),z(nxyz),t(nxyz),u(nxyz))
 CALL SPMMULT(x,r) ; r = b - r        ! compute initial residual vector

 write(*,'(A)') ' Iter      Alpha        Beta     RMS Residual   Sum of
Residuals'
@@ -171,12 +171,12 @@ write(*,'(I4,24X,2G18.7)') 0,sqrt(DOT_PR

                                      !  Do a single iteration with alpha =1 
                                      !  to reduce sum of residuals to 0
-p = r ; CALL NF3DPrecon(p,1,nxyz) ; CALL SPMMULT(p,z)
+p = r ; CALL NF3DPrecon(p,t,u,1,nxyz) ; CALL SPMMULT(p,z)
 x = x + p ; r = r - z
 write(*,'(I4,F12.5,12X,2G18.7)') 0,1.0,sqrt(DOT_PRODUCT(r,r)/nxyz),sum(r)

 do iter = 1 , maxiter
-   q = r ; CALL NF3DPrecon(q,1,nxyz)
+   q = r ; CALL NF3DPrecon(q,t,u,1,nxyz)
    qr = DOT_PRODUCT(q,r)
    if ( iter==1 ) then
       beta = 0.0
@@ -197,7 +197,7 @@ call system_clock(tcg,tickspersec,maxtic
 write(*,'(/A,F10.3/A,F10.3/A,F10.3)') ' Time for setup    
',REAL(tgi-tbase)/REAL(tickspersec) , &
                                       ' Time per iteration
',REAL(tcg-tgi)/REAL(tickspersec*min(iter,maxiter)) , &
                                       ' Total Time        
',REAL(tcg-tbase)/REAL(tickspersec)
-deallocate(r,q,p,z,gi)
+deallocate(r,q,p,z,gi,t,u)
 contains
                                     
!=========================================
                                      ! Banded matrix multiply b = A.x
=========                                     
@@ -253,7 +253,7 @@ end subroutine GetGI2D               !==

                                     
!=========================================
                                      ! solve for a plane of cells using 
======
-subroutine NF2DPrecon(x,i1,i2)       ! 2D NF Preconditioning matrix
+subroutine NF2DPrecon(x,t,i1,i2)       ! 2D NF Preconditioning matrix
 integer :: i1 , i2
 real(dpkind),dimension(i2)::x,t
 integer :: i
@@ -272,11 +272,12 @@ end subroutine NF2DPrecon            !==
 subroutine GetGI3D(i1,i2)            ! compute gi for a 3D block of cells
=====
 integer :: i1 , i2
 integer :: i
+real(dpkind),dimension(nxyz)::t
 g = ad
 do i = i1 , i2 , nxy                 ! advance one plane at a time
    if ( i>i1 ) then                  ! get contribution from previous plane 
       g(i-nxy:i-1) = au3(i-nxy:i-1)
-      call NF2DPrecon(g,i-nxy,i-1)
+      call NF2DPrecon(g,t,i-nxy,i-1)
       g(i:i+nxy-1) = g(i:i+nxy-1) - au3(i-nxy:i-1)*g(i-nxy:i-1)
    endif
    call GetGI2D(i,i+nxy-1)           ! get contribution from this plane
@@ -285,17 +286,17 @@ end subroutine GetGI3D               !==

                                     
!=========================================
                                      ! solve for a 3D block of cells using 
-subroutine NF3DPrecon(x,i1,i2)       ! 3D Preconditioning matrix
+subroutine NF3DPrecon(x,t,u,i1,i2)       ! 3D Preconditioning matrix
 integer :: i1 , i2
-real(dpkind),dimension(i2)::x,t
+real(dpkind),dimension(i2)::x,t,u
 integer :: i
 do i = i1 , i2 , nxy
    if ( i>i1 ) x(i:i+nxy-1) = x(i:i+nxy-1) - au3(i-nxy:i-1)*x(i-nxy:i-1)
-   call NF2DPrecon(x,i,i+nxy-1)
+   call NF2DPrecon(x,u,i,i+nxy-1)
 enddo   
 do i = i2-2*nxy+1 , i1 , -nxy
    t(i:i+nxy-1) = au3(i:i+nxy-1)*x(i+nxy:i+2*nxy-1)
-   call NF2DPrecon(t,i,i+nxy-1)
+   call NF2DPrecon(t,u,i,i+nxy-1)
    x(i:i+nxy-1) = x(i:i+nxy-1) - t(i:i+nxy-1)
 enddo
 end subroutine NF3DPrecon           
!=========================================

cuts the execution time from ~28s to ~20s (Note that with the options I use all
the procs are inlined).

Reply via email to