Hello,

>
> i am still trying to understand the parallelized version of the heat
> equation 2D solving that we saw at school. In order to explain my problem, i
> need to list the main code :
>
>   9       program heat
>  10
> !**************************************************************************
>  11 !
>  12 !   This program solves the heat equation on the unit square
> [0,1]x[0,1]
>  13 !        | du/dt - Delta(u) = 0
>  14 !        |  u/gamma = cste
>  15 !   by implementing a explicit scheme.
>  16 !   The discretization is done using a 5 point finite difference scheme
>  17 !   and the domain is decomposed into sub-domains.
>  18 !   The PDE is discretized using a 5 point finite difference scheme
>  19 !   over a (x_dim+2)*(x_dim+2) grid including the end points
>  20 !   correspond to the boundary points that are stored.
>  21 !
>  22 !   The data on the whole domain are stored in
>  23 !   the following way :
>  24 !
>  25 !    y
>  26 !           ------------------------------------
>  27 !    d      |                                  |
>  28 !    i      |                                  |
>  29 !    r      |                                  |
>  30 !    e      |                                  |
>  31 !    c      |                                  |
>  32 !    t      |                                  |
>  33 !    i      | x20                              |
>  34 !    o /\   |                                  |
>  35 !    n  |   | x10                              |
>  36 !       |   |                                  |
>  37 !       |   | x00  x01 x02 ...                 |
>  38 !       |   ------------------------------------
>  39 !        -------> x direction  x(*,j)
>  40 !
>  41 !   The boundary conditions are stored in the following submatrices
>  42 !
>  43 !
>  44 !        x(1:x_dim, 0)          ---> left   temperature
>  45 !        x(1:x_dim, x_dim+1)    ---> right  temperature
>  46 !        x(0, 1:x_dim)          ---> top    temperature
>  47 !        x(x_dim+1, 1:x_dim)    ---> bottom temperature
>  48 !
>  49
> !**************************************************************************
>  50       implicit none
>  51       include 'mpif.h'
>  52 ! size of the discretization
>  53       integer :: x_dim, nb_iter
>  54       double precision, allocatable :: x(:,:),b(:,:),x0(:,:)
>  55       double precision  :: dt, h, epsilon
>  56       double precision  :: resLoc, result, t, tstart, tend
>  57 !
>  58       integer :: i,j
>  59       integer :: step, maxStep
>  60       integer :: size_x, size_y, me, x_domains,y_domains
>  61       integer :: iconf(5), size_x_glo
>  62       double precision conf(2)
>  63 !
>  64 ! MPI variables
>  65       integer :: nproc, infompi, comm, comm2d, lda, ndims
>  66       INTEGER, DIMENSION(2)  :: dims
>  67       LOGICAL, DIMENSION(2)  :: periods
>  68       LOGICAL, PARAMETER     :: reorganisation = .false.
>  69       integer :: row_type
>  70       integer, parameter :: nbvi=4
>  71       integer, parameter :: S=1, E=2, N=3, W=4
>  72       integer, dimension(4) :: neighBor
>  73
>  74 !
>  75       intrinsic abs
>  76 !
>  77 !
>  78       call MPI_INIT(infompi)
>  79       comm = MPI_COMM_WORLD
>  80       call MPI_COMM_SIZE(comm,nproc,infompi)
>  81       call MPI_COMM_RANK(comm,me,infompi)
>  82 !
>  83 !
>  84       if (me.eq.0) then
>  85           call readparam(iconf, conf)
>  86       endif
>  87       call MPI_BCAST(iconf,5,MPI_INTEGER,0,comm,infompi)
>  88       call MPI_BCAST(conf,2,MPI_DOUBLE_PRECISION,0,comm,infompi)
>  89 !
>  90       size_x    = iconf(1)
>  91       size_y    = iconf(1)
>  92       x_domains = iconf(3)
>  93       y_domains = iconf(4)
>  94       maxStep   = iconf(5)
>  95       dt        = conf(1)
>  96       epsilon   = conf(2)
>  97 !
>  98       size_x_glo = x_domains*size_x+2
>  99       h      = 1.0d0/dble(size_x_glo)
> 100       dt     = 0.25*h*h
> 101 !
> 102 !
> 103       lda = size_y+2
> 104       allocate(x(0:size_y+1,0:size_x+1))
> 105       allocate(x0(0:size_y+1,0:size_x+1))
> 106       allocate(b(0:size_y+1,0:size_x+1))
> 107 !
> 108 ! Create 2D cartesian grid
> 109       periods(:) = .false.
> 110
> 111       ndims = 2
> 112       dims(1)=x_domains
> 113       dims(2)=y_domains
> 114       CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, &
> 115         reorganisation,comm2d,infompi)
> 116 !
> 117 ! Identify neighbors
> 118 !
> 119       NeighBor(:) = MPI_PROC_NULL
> 120 ! Left/West and right/Est neigbors
> 121       CALL MPI_CART_SHIFT(comm2d,0,1,NeighBor(W),NeighBor(E),infompi)
> 122
> 123       print *,'mpi_proc_null=', MPI_PROC_NULL
> 124       print *,'rang=', me
> 125       print *, 'ici premier mpi_cart_shift : neighbor(w)=',NeighBor(W)
> 126       print *, 'ici premier mpi_cart_shift : neighbor(e)=',NeighBor(E)
> 127
> 128 ! Bottom/South and Upper/North neigbors
> 129       CALL MPI_CART_SHIFT(comm2d,1,1,NeighBor(S),NeighBor(N),infompi)
> 130
> 131
> 132       print *, 'ici deuxieme mpi_cart_shift : neighbor(s)=',NeighBor(S)
> 133       print *, 'ici deuxieme mpi_cart_shift : neighbor(n)=',NeighBor(N)
> 134
> 135
> 136
> 137 !
> 138 ! Create row data type to coimmunicate with South and North neighbors
> 139 !
> 140       CALL MPI_TYPE_VECTOR(size_x, 1, size_y+2, MPI_DOUBLE_PRECISION,
> row_type,infompi)
> 141       CALL MPI_TYPE_COMMIT(row_type, infompi)
> 142 !
> 143 ! initialization
> 144 !
> 145       call initvalues(x0, b, size_x+1, size_x )
> 146 !
> 147 ! Update the boundaries
> 148 !
> 149       call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
> 150
> 151       step = 0
> 152       t    = 0.0
> 153 !
> 154       tstart = MPI_Wtime()
> 155 ! REPEAT
> 156  10   continue
> 157 !
> 158          step = step + 1
> 159          t    = t + dt
> 160 ! perform one step of the explicit scheme
> 161          call Explicit(x0,x,b, size_x+1, size_x, size_x, dt, h, resLoc)
> 162 ! update the partial solution along the interface
> 163          call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
> 164 ! Check the distance between two iterates
> 165          call MPI_ALLREDUCE(resLoc,result,1, MPI_DOUBLE_PRECISION,
> MPI_SUM,comm,infompi)
> 166          result= sqrt(result)
> 167 !
> 168          if (me.eq.0) write(*,1002) t,result
> 169 !
> 170        if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
> 171 !
> 172 ! UNTIL "Convergence"
> 173 !
> 174        tend = MPI_Wtime()
> 175        if (me.eq.0) then
> 176          write(*,*)
> 177          write(*,*) ' Convergence after ', step,' steps '
> 178          write(*,*) '      Problem size              ',
> size_x*x_domains*size_y*y_domains
> 179          write(*,*) ' Wall Clock                     ', tend-tstart
> 180
> 181 !
> 182 ! Print the solution at each point of the grid
> 183 !
> 184          write(*,*)
> 185          write(*,*) ' Computed solution '
> 186          write(*,*)
> 187          do 30, j=size_x+1,0,-1
> 188             write(*,1000)(x0(j,i),i=0,size_x+1)
> 189  30      continue
> 190        endif
> 191 !
> 192       call MPI_FINALIZE(infompi)
> 193 !
> 194       deallocate(x)
> 195       deallocate(x0)
> 196       deallocate(b)
> 197 !
> 198 ! Formats available to display the computed values on the grid
> 199 !
> 200 1000  format(100(1x, f7.3))
> 201 1001  format(100(1x, e7.3))
> 202 1002   format(' At time ',E8.2,' Norm ', E8.2)
> 203
> 204 !
> 205       stop
> 206       end
> 207 !
>
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> The routine "Explicit" at line 161 allows to compute  the domain 2D ( the
> values of Temperature are stocked in vector "x" : x(1:size_x,1:size_y)) with
> the following scheme :
>
> ! The stencil of the explicit operator for the heat equation
> ! on a regular rectangular grid using a five point finite difference
> ! scheme in space is
> !
> !                     |                + 1
>      |
> !                     |
>       |
> !dt/(h*h) *      | +1      -4 + h*h/dt                   + 1    |
> !                     |
>      |
> !                     |               + 1
>   |
> !
>       diag = - 4.0 + h*h/dt
>       weight =  dt/(h*h)
> !
> ! perform an explicit update on the points within the domain
>         do 20, j=1,size_x
>           do 30, i=1,size_y
>              x(i,j) = weight *( x0(i-1,j) + x0(i+1,j)  &
>                         + x0(i,j-1) + x0(i,j+1) +x0(i,j)*diag)
>  30       continue
>  20     continue
>
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> The routine "updateBound" updates the bound values like this :
>
>   1 !*******************************************************************
>   2 SUBROUTINE updateBound(x, size_x, size_y, NeighBor, comm2d, row_type)
>   3
>   4 !*****************************************************************
>   5    include 'mpif.h'
>   6 !
>   7   INTEGER size_x, size_y
>   8 !Iterate
>   9   double precision,  dimension(0:size_y+1,0:size_x+1) :: x
>  10 !Type row_type
>  11   INTEGER :: row_type
>  12   integer, parameter :: nbvi=4
>  13   integer, parameter :: S=1, E=2, N=3, W=4
>  14   integer, dimension(4) :: neighBor
>  15 !
>  16   INTEGER  :: infompi, comm2d
>  17   INTEGER :: flag
>  18   INTEGER, DIMENSION(MPI_STATUS_SIZE)    :: status
>  19
>  20 !********* North/South communication
> ************************************
>  21   flag = 1
>  22   !Send my boundary to North and receive from South
>  23   CALL MPI_SENDRECV(x(size_y, 1), 1, row_type ,neighBor(N),flag, &
>  24                      x(0,1), 1, row_type,neighbor(S),flag, &
>  25                      comm2d, status, infompi)
>  26
>  27   !Send my boundary to South and receive from North
>  28   CALL MPI_SENDRECV(x(1,1), 1, row_type ,neighbor(S),flag, &
>  29                      x(size_y+1,1), 1, row_type ,neighbor(N),flag, &
>  30                      comm2d, status, infompi)
>  31
>  32 !********* Est/West communication ************************************
>  33   flag = 2
>  34   !Send my boundary to West and receive from Est
>  35   CALL MPI_SENDRECV(x(1,1),  size_y, MPI_DOUBLE_PRECISION, neighbor(W),
> flag, &
>  36                      x(1, size_x+1),  size_y,
> MPI_DOUBLE_PRECISION,neighbor(E), flag, &
>  37                      comm2d, status, infompi)
>  38
>  39   !Send my boundary to Est and receive from West
>  40   CALL MPI_SENDRECV( x(1,size_x), size_y, MPI_DOUBLE_PRECISION,
> neighbor(E), flag, &
>  41                      x(1,0),size_y, MPI_DOUBLE_PRECISION, neighbor(W),
> flag, &
>  42                      comm2d, status, infompi)
>  43
>  44 END SUBROUTINE updateBound
>
>
> --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> I am confused between the shift of the values near to the bounds done by
> the "updateBound" routine  and the main loop (at line 161 in main code)
> which calls the routine "Explicit".
> For a given process (say number 1) ( i use 4 here for execution), i send to
> the east process (3) the penultimate column left column, to the north
> process (0) the penultimate row top ,and to the others (mpi_proc_null=-2)
> the penultimate right column and the bottom row. But how the 4  processes
> are synchronous ? I don't understand too why all the processes go through
> the solving piece of code calling the "Explicit" routine.
>
> Here'i the sequential version of this simulation :
>
>  49       program heat
>  50       implicit none
>  51 ! size of the discretization
>  52       integer size_x, maxStep
>  53       integer lda
>  54       double precision, allocatable:: x(:,:),b(:,:), x0(:,:)
>  55       double precision dt, h, result, epsilon
>  56
>  57 ! index loop
>  58       integer i,j, step
>  59       double precision t
>  60 !
>  61       intrinsic abs
>  62 !
>  63 !
>  64       print *,' Size of the square '
>  65       read(*,*) size_x
>  66       h       = 1.0/(size_x+1)
>  67       dt      = 0.25*h*h
>  68       maxStep = 100
>  69       print *, 'Max. number of steps '
>  70       read(*,*) maxStep
>  71       epsilon = 1.d-8
>  72 !
>  73       allocate(x(0:size_x+1,0:size_x+1))
>  74       allocate(x0(0:size_x+1,0:size_x+1))
>  75       allocate(b(0:size_x+1,0:size_x+1))
>  76 !
>  77 !
>  78 ! initialization
>  79 !
>  80       call initvalues(x0, b, size_x+1, size_x )
>  81 !
>  82       step = 0
>  83       t    = 0.0
>  84 ! REPEAT
>  85  10   continue
>  86 !
>  87          step = step + 1
>  88          t    = t + dt
>  89 !
>  90          call Explicit(x0, x, b, size_x+1, size_x, size_x, dt, &
>  91             h, result)
>  92          result = sqrt(result)
>  93        if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
>  94 !
>  95         write(*,*)
>  96         write(*,*) ' Convergence after ', step,' steps '
>  97         write(*,*) '      Problem size              ',  size_x*size_x
>  98 !
>
>
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> Sorry for this long post but i would like to clarify my problem as much as
> it was possible.
>
> Any explanation would help me.
>
> Thanks in advance
>
>

Reply via email to