http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38318
Dominique d'Humieres <dominiq at lps dot ens.fr> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |jh at suse dot cz, | |rguenther at suse dot de --- Comment #5 from Dominique d'Humieres <dominiq at lps dot ens.fr> 2010-10-07 15:04:34 UTC --- Another case of interest is "automatic arrays". An interesting example is the polyhedron test nf.f90. On Core2 Duo and Darwin the following patch --- nf.f90 2005-10-11 22:53:32.000000000 +0200 +++ nf_v2.f90 2010-10-07 16:49:38.000000000 +0200 @@ -153,7 +153,7 @@ integer :: nx , nxy , nxyz , maxiter real(dpkind),dimension(nxyz):: ad,au1,au2,au3,x,b real(dpkind)::targrms -real(dpkind),allocatable,dimension(:) :: r,q,p,z,g,gi +real(dpkind),allocatable,dimension(:) :: r,q,p,z,g,gi,t,u real(dpkind):: alpha,beta,qr,qrp,rmserr integer :: iter , tbase , tgi , tcg , tickspersec , maxticks @@ -163,7 +163,7 @@ call GetGI3D(1,nxyz) ! c call system_clock(tgi,tickspersec,maxticks) deallocate(g) -allocate (r(nxyz),q(nxyz),p(nxyz),z(nxyz)) +allocate (r(nxyz),q(nxyz),p(nxyz),z(nxyz),t(nxyz),u(nxyz)) CALL SPMMULT(x,r) ; r = b - r ! compute initial residual vector write(*,'(A)') ' Iter Alpha Beta RMS Residual Sum of Residuals' @@ -171,12 +171,12 @@ write(*,'(I4,24X,2G18.7)') 0,sqrt(DOT_PR ! Do a single iteration with alpha =1 ! to reduce sum of residuals to 0 -p = r ; CALL NF3DPrecon(p,1,nxyz) ; CALL SPMMULT(p,z) +p = r ; CALL NF3DPrecon(p,t,u,1,nxyz) ; CALL SPMMULT(p,z) x = x + p ; r = r - z write(*,'(I4,F12.5,12X,2G18.7)') 0,1.0,sqrt(DOT_PRODUCT(r,r)/nxyz),sum(r) do iter = 1 , maxiter - q = r ; CALL NF3DPrecon(q,1,nxyz) + q = r ; CALL NF3DPrecon(q,t,u,1,nxyz) qr = DOT_PRODUCT(q,r) if ( iter==1 ) then beta = 0.0 @@ -197,7 +197,7 @@ call system_clock(tcg,tickspersec,maxtic write(*,'(/A,F10.3/A,F10.3/A,F10.3)') ' Time for setup ',REAL(tgi-tbase)/REAL(tickspersec) , & ' Time per iteration ',REAL(tcg-tgi)/REAL(tickspersec*min(iter,maxiter)) , & ' Total Time ',REAL(tcg-tbase)/REAL(tickspersec) -deallocate(r,q,p,z,gi) +deallocate(r,q,p,z,gi,t,u) contains !========================================= ! Banded matrix multiply b = A.x ========= @@ -253,7 +253,7 @@ end subroutine GetGI2D !== !========================================= ! solve for a plane of cells using ====== -subroutine NF2DPrecon(x,i1,i2) ! 2D NF Preconditioning matrix +subroutine NF2DPrecon(x,t,i1,i2) ! 2D NF Preconditioning matrix integer :: i1 , i2 real(dpkind),dimension(i2)::x,t integer :: i @@ -272,11 +272,12 @@ end subroutine NF2DPrecon !== subroutine GetGI3D(i1,i2) ! compute gi for a 3D block of cells ===== integer :: i1 , i2 integer :: i +real(dpkind),dimension(nxyz)::t g = ad do i = i1 , i2 , nxy ! advance one plane at a time if ( i>i1 ) then ! get contribution from previous plane g(i-nxy:i-1) = au3(i-nxy:i-1) - call NF2DPrecon(g,i-nxy,i-1) + call NF2DPrecon(g,t,i-nxy,i-1) g(i:i+nxy-1) = g(i:i+nxy-1) - au3(i-nxy:i-1)*g(i-nxy:i-1) endif call GetGI2D(i,i+nxy-1) ! get contribution from this plane @@ -285,17 +286,17 @@ end subroutine GetGI3D !== !========================================= ! solve for a 3D block of cells using -subroutine NF3DPrecon(x,i1,i2) ! 3D Preconditioning matrix +subroutine NF3DPrecon(x,t,u,i1,i2) ! 3D Preconditioning matrix integer :: i1 , i2 -real(dpkind),dimension(i2)::x,t +real(dpkind),dimension(i2)::x,t,u integer :: i do i = i1 , i2 , nxy if ( i>i1 ) x(i:i+nxy-1) = x(i:i+nxy-1) - au3(i-nxy:i-1)*x(i-nxy:i-1) - call NF2DPrecon(x,i,i+nxy-1) + call NF2DPrecon(x,u,i,i+nxy-1) enddo do i = i2-2*nxy+1 , i1 , -nxy t(i:i+nxy-1) = au3(i:i+nxy-1)*x(i+nxy:i+2*nxy-1) - call NF2DPrecon(t,i,i+nxy-1) + call NF2DPrecon(t,u,i,i+nxy-1) x(i:i+nxy-1) = x(i:i+nxy-1) - t(i:i+nxy-1) enddo end subroutine NF3DPrecon !========================================= cuts the execution time from ~28s to ~20s (Note that with the options I use all the procs are inlined).