Hello,
> > i am still trying to understand the parallelized version of the heat > equation 2D solving that we saw at school. In order to explain my problem, i > need to list the main code : > > 9 program heat > 10 > !************************************************************************** > 11 ! > 12 ! This program solves the heat equation on the unit square > [0,1]x[0,1] > 13 ! | du/dt - Delta(u) = 0 > 14 ! | u/gamma = cste > 15 ! by implementing a explicit scheme. > 16 ! The discretization is done using a 5 point finite difference scheme > 17 ! and the domain is decomposed into sub-domains. > 18 ! The PDE is discretized using a 5 point finite difference scheme > 19 ! over a (x_dim+2)*(x_dim+2) grid including the end points > 20 ! correspond to the boundary points that are stored. > 21 ! > 22 ! The data on the whole domain are stored in > 23 ! the following way : > 24 ! > 25 ! y > 26 ! ------------------------------------ > 27 ! d | | > 28 ! i | | > 29 ! r | | > 30 ! e | | > 31 ! c | | > 32 ! t | | > 33 ! i | x20 | > 34 ! o /\ | | > 35 ! n | | x10 | > 36 ! | | | > 37 ! | | x00 x01 x02 ... | > 38 ! | ------------------------------------ > 39 ! -------> x direction x(*,j) > 40 ! > 41 ! The boundary conditions are stored in the following submatrices > 42 ! > 43 ! > 44 ! x(1:x_dim, 0) ---> left temperature > 45 ! x(1:x_dim, x_dim+1) ---> right temperature > 46 ! x(0, 1:x_dim) ---> top temperature > 47 ! x(x_dim+1, 1:x_dim) ---> bottom temperature > 48 ! > 49 > !************************************************************************** > 50 implicit none > 51 include 'mpif.h' > 52 ! size of the discretization > 53 integer :: x_dim, nb_iter > 54 double precision, allocatable :: x(:,:),b(:,:),x0(:,:) > 55 double precision :: dt, h, epsilon > 56 double precision :: resLoc, result, t, tstart, tend > 57 ! > 58 integer :: i,j > 59 integer :: step, maxStep > 60 integer :: size_x, size_y, me, x_domains,y_domains > 61 integer :: iconf(5), size_x_glo > 62 double precision conf(2) > 63 ! > 64 ! MPI variables > 65 integer :: nproc, infompi, comm, comm2d, lda, ndims > 66 INTEGER, DIMENSION(2) :: dims > 67 LOGICAL, DIMENSION(2) :: periods > 68 LOGICAL, PARAMETER :: reorganisation = .false. > 69 integer :: row_type > 70 integer, parameter :: nbvi=4 > 71 integer, parameter :: S=1, E=2, N=3, W=4 > 72 integer, dimension(4) :: neighBor > 73 > 74 ! > 75 intrinsic abs > 76 ! > 77 ! > 78 call MPI_INIT(infompi) > 79 comm = MPI_COMM_WORLD > 80 call MPI_COMM_SIZE(comm,nproc,infompi) > 81 call MPI_COMM_RANK(comm,me,infompi) > 82 ! > 83 ! > 84 if (me.eq.0) then > 85 call readparam(iconf, conf) > 86 endif > 87 call MPI_BCAST(iconf,5,MPI_INTEGER,0,comm,infompi) > 88 call MPI_BCAST(conf,2,MPI_DOUBLE_PRECISION,0,comm,infompi) > 89 ! > 90 size_x = iconf(1) > 91 size_y = iconf(1) > 92 x_domains = iconf(3) > 93 y_domains = iconf(4) > 94 maxStep = iconf(5) > 95 dt = conf(1) > 96 epsilon = conf(2) > 97 ! > 98 size_x_glo = x_domains*size_x+2 > 99 h = 1.0d0/dble(size_x_glo) > 100 dt = 0.25*h*h > 101 ! > 102 ! > 103 lda = size_y+2 > 104 allocate(x(0:size_y+1,0:size_x+1)) > 105 allocate(x0(0:size_y+1,0:size_x+1)) > 106 allocate(b(0:size_y+1,0:size_x+1)) > 107 ! > 108 ! Create 2D cartesian grid > 109 periods(:) = .false. > 110 > 111 ndims = 2 > 112 dims(1)=x_domains > 113 dims(2)=y_domains > 114 CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, & > 115 reorganisation,comm2d,infompi) > 116 ! > 117 ! Identify neighbors > 118 ! > 119 NeighBor(:) = MPI_PROC_NULL > 120 ! Left/West and right/Est neigbors > 121 CALL MPI_CART_SHIFT(comm2d,0,1,NeighBor(W),NeighBor(E),infompi) > 122 > 123 print *,'mpi_proc_null=', MPI_PROC_NULL > 124 print *,'rang=', me > 125 print *, 'ici premier mpi_cart_shift : neighbor(w)=',NeighBor(W) > 126 print *, 'ici premier mpi_cart_shift : neighbor(e)=',NeighBor(E) > 127 > 128 ! Bottom/South and Upper/North neigbors > 129 CALL MPI_CART_SHIFT(comm2d,1,1,NeighBor(S),NeighBor(N),infompi) > 130 > 131 > 132 print *, 'ici deuxieme mpi_cart_shift : neighbor(s)=',NeighBor(S) > 133 print *, 'ici deuxieme mpi_cart_shift : neighbor(n)=',NeighBor(N) > 134 > 135 > 136 > 137 ! > 138 ! Create row data type to coimmunicate with South and North neighbors > 139 ! > 140 CALL MPI_TYPE_VECTOR(size_x, 1, size_y+2, MPI_DOUBLE_PRECISION, > row_type,infompi) > 141 CALL MPI_TYPE_COMMIT(row_type, infompi) > 142 ! > 143 ! initialization > 144 ! > 145 call initvalues(x0, b, size_x+1, size_x ) > 146 ! > 147 ! Update the boundaries > 148 ! > 149 call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type) > 150 > 151 step = 0 > 152 t = 0.0 > 153 ! > 154 tstart = MPI_Wtime() > 155 ! REPEAT > 156 10 continue > 157 ! > 158 step = step + 1 > 159 t = t + dt > 160 ! perform one step of the explicit scheme > 161 call Explicit(x0,x,b, size_x+1, size_x, size_x, dt, h, resLoc) > 162 ! update the partial solution along the interface > 163 call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type) > 164 ! Check the distance between two iterates > 165 call MPI_ALLREDUCE(resLoc,result,1, MPI_DOUBLE_PRECISION, > MPI_SUM,comm,infompi) > 166 result= sqrt(result) > 167 ! > 168 if (me.eq.0) write(*,1002) t,result > 169 ! > 170 if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10 > 171 ! > 172 ! UNTIL "Convergence" > 173 ! > 174 tend = MPI_Wtime() > 175 if (me.eq.0) then > 176 write(*,*) > 177 write(*,*) ' Convergence after ', step,' steps ' > 178 write(*,*) ' Problem size ', > size_x*x_domains*size_y*y_domains > 179 write(*,*) ' Wall Clock ', tend-tstart > 180 > 181 ! > 182 ! Print the solution at each point of the grid > 183 ! > 184 write(*,*) > 185 write(*,*) ' Computed solution ' > 186 write(*,*) > 187 do 30, j=size_x+1,0,-1 > 188 write(*,1000)(x0(j,i),i=0,size_x+1) > 189 30 continue > 190 endif > 191 ! > 192 call MPI_FINALIZE(infompi) > 193 ! > 194 deallocate(x) > 195 deallocate(x0) > 196 deallocate(b) > 197 ! > 198 ! Formats available to display the computed values on the grid > 199 ! > 200 1000 format(100(1x, f7.3)) > 201 1001 format(100(1x, e7.3)) > 202 1002 format(' At time ',E8.2,' Norm ', E8.2) > 203 > 204 ! > 205 stop > 206 end > 207 ! > > --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- > > The routine "Explicit" at line 161 allows to compute the domain 2D ( the > values of Temperature are stocked in vector "x" : x(1:size_x,1:size_y)) with > the following scheme : > > ! The stencil of the explicit operator for the heat equation > ! on a regular rectangular grid using a five point finite difference > ! scheme in space is > ! > ! | + 1 > | > ! | > | > !dt/(h*h) * | +1 -4 + h*h/dt + 1 | > ! | > | > ! | + 1 > | > ! > diag = - 4.0 + h*h/dt > weight = dt/(h*h) > ! > ! perform an explicit update on the points within the domain > do 20, j=1,size_x > do 30, i=1,size_y > x(i,j) = weight *( x0(i-1,j) + x0(i+1,j) & > + x0(i,j-1) + x0(i,j+1) +x0(i,j)*diag) > 30 continue > 20 continue > > > ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ > > The routine "updateBound" updates the bound values like this : > > 1 !******************************************************************* > 2 SUBROUTINE updateBound(x, size_x, size_y, NeighBor, comm2d, row_type) > 3 > 4 !***************************************************************** > 5 include 'mpif.h' > 6 ! > 7 INTEGER size_x, size_y > 8 !Iterate > 9 double precision, dimension(0:size_y+1,0:size_x+1) :: x > 10 !Type row_type > 11 INTEGER :: row_type > 12 integer, parameter :: nbvi=4 > 13 integer, parameter :: S=1, E=2, N=3, W=4 > 14 integer, dimension(4) :: neighBor > 15 ! > 16 INTEGER :: infompi, comm2d > 17 INTEGER :: flag > 18 INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status > 19 > 20 !********* North/South communication > ************************************ > 21 flag = 1 > 22 !Send my boundary to North and receive from South > 23 CALL MPI_SENDRECV(x(size_y, 1), 1, row_type ,neighBor(N),flag, & > 24 x(0,1), 1, row_type,neighbor(S),flag, & > 25 comm2d, status, infompi) > 26 > 27 !Send my boundary to South and receive from North > 28 CALL MPI_SENDRECV(x(1,1), 1, row_type ,neighbor(S),flag, & > 29 x(size_y+1,1), 1, row_type ,neighbor(N),flag, & > 30 comm2d, status, infompi) > 31 > 32 !********* Est/West communication ************************************ > 33 flag = 2 > 34 !Send my boundary to West and receive from Est > 35 CALL MPI_SENDRECV(x(1,1), size_y, MPI_DOUBLE_PRECISION, neighbor(W), > flag, & > 36 x(1, size_x+1), size_y, > MPI_DOUBLE_PRECISION,neighbor(E), flag, & > 37 comm2d, status, infompi) > 38 > 39 !Send my boundary to Est and receive from West > 40 CALL MPI_SENDRECV( x(1,size_x), size_y, MPI_DOUBLE_PRECISION, > neighbor(E), flag, & > 41 x(1,0),size_y, MPI_DOUBLE_PRECISION, neighbor(W), > flag, & > 42 comm2d, status, infompi) > 43 > 44 END SUBROUTINE updateBound > > > -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- > > I am confused between the shift of the values near to the bounds done by > the "updateBound" routine and the main loop (at line 161 in main code) > which calls the routine "Explicit". > For a given process (say number 1) ( i use 4 here for execution), i send to > the east process (3) the penultimate column left column, to the north > process (0) the penultimate row top ,and to the others (mpi_proc_null=-2) > the penultimate right column and the bottom row. But how the 4 processes > are synchronous ? I don't understand too why all the processes go through > the solving piece of code calling the "Explicit" routine. > > Here'i the sequential version of this simulation : > > 49 program heat > 50 implicit none > 51 ! size of the discretization > 52 integer size_x, maxStep > 53 integer lda > 54 double precision, allocatable:: x(:,:),b(:,:), x0(:,:) > 55 double precision dt, h, result, epsilon > 56 > 57 ! index loop > 58 integer i,j, step > 59 double precision t > 60 ! > 61 intrinsic abs > 62 ! > 63 ! > 64 print *,' Size of the square ' > 65 read(*,*) size_x > 66 h = 1.0/(size_x+1) > 67 dt = 0.25*h*h > 68 maxStep = 100 > 69 print *, 'Max. number of steps ' > 70 read(*,*) maxStep > 71 epsilon = 1.d-8 > 72 ! > 73 allocate(x(0:size_x+1,0:size_x+1)) > 74 allocate(x0(0:size_x+1,0:size_x+1)) > 75 allocate(b(0:size_x+1,0:size_x+1)) > 76 ! > 77 ! > 78 ! initialization > 79 ! > 80 call initvalues(x0, b, size_x+1, size_x ) > 81 ! > 82 step = 0 > 83 t = 0.0 > 84 ! REPEAT > 85 10 continue > 86 ! > 87 step = step + 1 > 88 t = t + dt > 89 ! > 90 call Explicit(x0, x, b, size_x+1, size_x, size_x, dt, & > 91 h, result) > 92 result = sqrt(result) > 93 if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10 > 94 ! > 95 write(*,*) > 96 write(*,*) ' Convergence after ', step,' steps ' > 97 write(*,*) ' Problem size ', size_x*size_x > 98 ! > > > ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ > > Sorry for this long post but i would like to clarify my problem as much as > it was possible. > > Any explanation would help me. > > Thanks in advance > >