Hello,
>
> i am still trying to understand the parallelized version of the heat
> equation 2D solving that we saw at school. In order to explain my problem, i
> need to list the main code :
>
> 9 program heat
> 10
> !**************************************************************************
> 11 !
> 12 ! This program solves the heat equation on the unit square
> [0,1]x[0,1]
> 13 ! | du/dt - Delta(u) = 0
> 14 ! | u/gamma = cste
> 15 ! by implementing a explicit scheme.
> 16 ! The discretization is done using a 5 point finite difference scheme
> 17 ! and the domain is decomposed into sub-domains.
> 18 ! The PDE is discretized using a 5 point finite difference scheme
> 19 ! over a (x_dim+2)*(x_dim+2) grid including the end points
> 20 ! correspond to the boundary points that are stored.
> 21 !
> 22 ! The data on the whole domain are stored in
> 23 ! the following way :
> 24 !
> 25 ! y
> 26 ! ------------------------------------
> 27 ! d | |
> 28 ! i | |
> 29 ! r | |
> 30 ! e | |
> 31 ! c | |
> 32 ! t | |
> 33 ! i | x20 |
> 34 ! o /\ | |
> 35 ! n | | x10 |
> 36 ! | | |
> 37 ! | | x00 x01 x02 ... |
> 38 ! | ------------------------------------
> 39 ! -------> x direction x(*,j)
> 40 !
> 41 ! The boundary conditions are stored in the following submatrices
> 42 !
> 43 !
> 44 ! x(1:x_dim, 0) ---> left temperature
> 45 ! x(1:x_dim, x_dim+1) ---> right temperature
> 46 ! x(0, 1:x_dim) ---> top temperature
> 47 ! x(x_dim+1, 1:x_dim) ---> bottom temperature
> 48 !
> 49
> !**************************************************************************
> 50 implicit none
> 51 include 'mpif.h'
> 52 ! size of the discretization
> 53 integer :: x_dim, nb_iter
> 54 double precision, allocatable :: x(:,:),b(:,:),x0(:,:)
> 55 double precision :: dt, h, epsilon
> 56 double precision :: resLoc, result, t, tstart, tend
> 57 !
> 58 integer :: i,j
> 59 integer :: step, maxStep
> 60 integer :: size_x, size_y, me, x_domains,y_domains
> 61 integer :: iconf(5), size_x_glo
> 62 double precision conf(2)
> 63 !
> 64 ! MPI variables
> 65 integer :: nproc, infompi, comm, comm2d, lda, ndims
> 66 INTEGER, DIMENSION(2) :: dims
> 67 LOGICAL, DIMENSION(2) :: periods
> 68 LOGICAL, PARAMETER :: reorganisation = .false.
> 69 integer :: row_type
> 70 integer, parameter :: nbvi=4
> 71 integer, parameter :: S=1, E=2, N=3, W=4
> 72 integer, dimension(4) :: neighBor
> 73
> 74 !
> 75 intrinsic abs
> 76 !
> 77 !
> 78 call MPI_INIT(infompi)
> 79 comm = MPI_COMM_WORLD
> 80 call MPI_COMM_SIZE(comm,nproc,infompi)
> 81 call MPI_COMM_RANK(comm,me,infompi)
> 82 !
> 83 !
> 84 if (me.eq.0) then
> 85 call readparam(iconf, conf)
> 86 endif
> 87 call MPI_BCAST(iconf,5,MPI_INTEGER,0,comm,infompi)
> 88 call MPI_BCAST(conf,2,MPI_DOUBLE_PRECISION,0,comm,infompi)
> 89 !
> 90 size_x = iconf(1)
> 91 size_y = iconf(1)
> 92 x_domains = iconf(3)
> 93 y_domains = iconf(4)
> 94 maxStep = iconf(5)
> 95 dt = conf(1)
> 96 epsilon = conf(2)
> 97 !
> 98 size_x_glo = x_domains*size_x+2
> 99 h = 1.0d0/dble(size_x_glo)
> 100 dt = 0.25*h*h
> 101 !
> 102 !
> 103 lda = size_y+2
> 104 allocate(x(0:size_y+1,0:size_x+1))
> 105 allocate(x0(0:size_y+1,0:size_x+1))
> 106 allocate(b(0:size_y+1,0:size_x+1))
> 107 !
> 108 ! Create 2D cartesian grid
> 109 periods(:) = .false.
> 110
> 111 ndims = 2
> 112 dims(1)=x_domains
> 113 dims(2)=y_domains
> 114 CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, &
> 115 reorganisation,comm2d,infompi)
> 116 !
> 117 ! Identify neighbors
> 118 !
> 119 NeighBor(:) = MPI_PROC_NULL
> 120 ! Left/West and right/Est neigbors
> 121 CALL MPI_CART_SHIFT(comm2d,0,1,NeighBor(W),NeighBor(E),infompi)
> 122
> 123 print *,'mpi_proc_null=', MPI_PROC_NULL
> 124 print *,'rang=', me
> 125 print *, 'ici premier mpi_cart_shift : neighbor(w)=',NeighBor(W)
> 126 print *, 'ici premier mpi_cart_shift : neighbor(e)=',NeighBor(E)
> 127
> 128 ! Bottom/South and Upper/North neigbors
> 129 CALL MPI_CART_SHIFT(comm2d,1,1,NeighBor(S),NeighBor(N),infompi)
> 130
> 131
> 132 print *, 'ici deuxieme mpi_cart_shift : neighbor(s)=',NeighBor(S)
> 133 print *, 'ici deuxieme mpi_cart_shift : neighbor(n)=',NeighBor(N)
> 134
> 135
> 136
> 137 !
> 138 ! Create row data type to coimmunicate with South and North neighbors
> 139 !
> 140 CALL MPI_TYPE_VECTOR(size_x, 1, size_y+2, MPI_DOUBLE_PRECISION,
> row_type,infompi)
> 141 CALL MPI_TYPE_COMMIT(row_type, infompi)
> 142 !
> 143 ! initialization
> 144 !
> 145 call initvalues(x0, b, size_x+1, size_x )
> 146 !
> 147 ! Update the boundaries
> 148 !
> 149 call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
> 150
> 151 step = 0
> 152 t = 0.0
> 153 !
> 154 tstart = MPI_Wtime()
> 155 ! REPEAT
> 156 10 continue
> 157 !
> 158 step = step + 1
> 159 t = t + dt
> 160 ! perform one step of the explicit scheme
> 161 call Explicit(x0,x,b, size_x+1, size_x, size_x, dt, h, resLoc)
> 162 ! update the partial solution along the interface
> 163 call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
> 164 ! Check the distance between two iterates
> 165 call MPI_ALLREDUCE(resLoc,result,1, MPI_DOUBLE_PRECISION,
> MPI_SUM,comm,infompi)
> 166 result= sqrt(result)
> 167 !
> 168 if (me.eq.0) write(*,1002) t,result
> 169 !
> 170 if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
> 171 !
> 172 ! UNTIL "Convergence"
> 173 !
> 174 tend = MPI_Wtime()
> 175 if (me.eq.0) then
> 176 write(*,*)
> 177 write(*,*) ' Convergence after ', step,' steps '
> 178 write(*,*) ' Problem size ',
> size_x*x_domains*size_y*y_domains
> 179 write(*,*) ' Wall Clock ', tend-tstart
> 180
> 181 !
> 182 ! Print the solution at each point of the grid
> 183 !
> 184 write(*,*)
> 185 write(*,*) ' Computed solution '
> 186 write(*,*)
> 187 do 30, j=size_x+1,0,-1
> 188 write(*,1000)(x0(j,i),i=0,size_x+1)
> 189 30 continue
> 190 endif
> 191 !
> 192 call MPI_FINALIZE(infompi)
> 193 !
> 194 deallocate(x)
> 195 deallocate(x0)
> 196 deallocate(b)
> 197 !
> 198 ! Formats available to display the computed values on the grid
> 199 !
> 200 1000 format(100(1x, f7.3))
> 201 1001 format(100(1x, e7.3))
> 202 1002 format(' At time ',E8.2,' Norm ', E8.2)
> 203
> 204 !
> 205 stop
> 206 end
> 207 !
>
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> The routine "Explicit" at line 161 allows to compute the domain 2D ( the
> values of Temperature are stocked in vector "x" : x(1:size_x,1:size_y)) with
> the following scheme :
>
> ! The stencil of the explicit operator for the heat equation
> ! on a regular rectangular grid using a five point finite difference
> ! scheme in space is
> !
> ! | + 1
> |
> ! |
> |
> !dt/(h*h) * | +1 -4 + h*h/dt + 1 |
> ! |
> |
> ! | + 1
> |
> !
> diag = - 4.0 + h*h/dt
> weight = dt/(h*h)
> !
> ! perform an explicit update on the points within the domain
> do 20, j=1,size_x
> do 30, i=1,size_y
> x(i,j) = weight *( x0(i-1,j) + x0(i+1,j) &
> + x0(i,j-1) + x0(i,j+1) +x0(i,j)*diag)
> 30 continue
> 20 continue
>
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> The routine "updateBound" updates the bound values like this :
>
> 1 !*******************************************************************
> 2 SUBROUTINE updateBound(x, size_x, size_y, NeighBor, comm2d, row_type)
> 3
> 4 !*****************************************************************
> 5 include 'mpif.h'
> 6 !
> 7 INTEGER size_x, size_y
> 8 !Iterate
> 9 double precision, dimension(0:size_y+1,0:size_x+1) :: x
> 10 !Type row_type
> 11 INTEGER :: row_type
> 12 integer, parameter :: nbvi=4
> 13 integer, parameter :: S=1, E=2, N=3, W=4
> 14 integer, dimension(4) :: neighBor
> 15 !
> 16 INTEGER :: infompi, comm2d
> 17 INTEGER :: flag
> 18 INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status
> 19
> 20 !********* North/South communication
> ************************************
> 21 flag = 1
> 22 !Send my boundary to North and receive from South
> 23 CALL MPI_SENDRECV(x(size_y, 1), 1, row_type ,neighBor(N),flag, &
> 24 x(0,1), 1, row_type,neighbor(S),flag, &
> 25 comm2d, status, infompi)
> 26
> 27 !Send my boundary to South and receive from North
> 28 CALL MPI_SENDRECV(x(1,1), 1, row_type ,neighbor(S),flag, &
> 29 x(size_y+1,1), 1, row_type ,neighbor(N),flag, &
> 30 comm2d, status, infompi)
> 31
> 32 !********* Est/West communication ************************************
> 33 flag = 2
> 34 !Send my boundary to West and receive from Est
> 35 CALL MPI_SENDRECV(x(1,1), size_y, MPI_DOUBLE_PRECISION, neighbor(W),
> flag, &
> 36 x(1, size_x+1), size_y,
> MPI_DOUBLE_PRECISION,neighbor(E), flag, &
> 37 comm2d, status, infompi)
> 38
> 39 !Send my boundary to Est and receive from West
> 40 CALL MPI_SENDRECV( x(1,size_x), size_y, MPI_DOUBLE_PRECISION,
> neighbor(E), flag, &
> 41 x(1,0),size_y, MPI_DOUBLE_PRECISION, neighbor(W),
> flag, &
> 42 comm2d, status, infompi)
> 43
> 44 END SUBROUTINE updateBound
>
>
> --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> I am confused between the shift of the values near to the bounds done by
> the "updateBound" routine and the main loop (at line 161 in main code)
> which calls the routine "Explicit".
> For a given process (say number 1) ( i use 4 here for execution), i send to
> the east process (3) the penultimate column left column, to the north
> process (0) the penultimate row top ,and to the others (mpi_proc_null=-2)
> the penultimate right column and the bottom row. But how the 4 processes
> are synchronous ? I don't understand too why all the processes go through
> the solving piece of code calling the "Explicit" routine.
>
> Here'i the sequential version of this simulation :
>
> 49 program heat
> 50 implicit none
> 51 ! size of the discretization
> 52 integer size_x, maxStep
> 53 integer lda
> 54 double precision, allocatable:: x(:,:),b(:,:), x0(:,:)
> 55 double precision dt, h, result, epsilon
> 56
> 57 ! index loop
> 58 integer i,j, step
> 59 double precision t
> 60 !
> 61 intrinsic abs
> 62 !
> 63 !
> 64 print *,' Size of the square '
> 65 read(*,*) size_x
> 66 h = 1.0/(size_x+1)
> 67 dt = 0.25*h*h
> 68 maxStep = 100
> 69 print *, 'Max. number of steps '
> 70 read(*,*) maxStep
> 71 epsilon = 1.d-8
> 72 !
> 73 allocate(x(0:size_x+1,0:size_x+1))
> 74 allocate(x0(0:size_x+1,0:size_x+1))
> 75 allocate(b(0:size_x+1,0:size_x+1))
> 76 !
> 77 !
> 78 ! initialization
> 79 !
> 80 call initvalues(x0, b, size_x+1, size_x )
> 81 !
> 82 step = 0
> 83 t = 0.0
> 84 ! REPEAT
> 85 10 continue
> 86 !
> 87 step = step + 1
> 88 t = t + dt
> 89 !
> 90 call Explicit(x0, x, b, size_x+1, size_x, size_x, dt, &
> 91 h, result)
> 92 result = sqrt(result)
> 93 if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
> 94 !
> 95 write(*,*)
> 96 write(*,*) ' Convergence after ', step,' steps '
> 97 write(*,*) ' Problem size ', size_x*size_x
> 98 !
>
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> Sorry for this long post but i would like to clarify my problem as much as
> it was possible.
>
> Any explanation would help me.
>
> Thanks in advance
>
>