Dear All,
I was parallelising the serial molecular dynamic simulation code as given below: I have only two processors. My system is a duacore system. c------------------------------------------------------------------ SERIAL CODE c................................................................... DO m=1,nmol !! nmol is total number of molecules DO i=1,2 ax(i,m)=0.0d0 !acceleration ay(i,m)=0.0d0 !acceleration az(i,m)=0.0d0 !acceleration ENDDO DO j=1,nmol Ngmol(j,m)=0 ENDDO ENDDO c--------------------------------------------- c force calculations c--------------------------------------------- DO m=1,nmol-1 DO i=1,2 natom = natom +1 ibeg = inbl(natom) iend = inbl(natom+1)-1 DO ilist=ibeg,iend !! no. of neighbors j=inblst1(ilist) !! neighbor molecular label k=inblst2(ilist) !! neighbor atomic label c j,k are molecular and atomic label of neighbour list of each molecule c on each processor. C ____________________ C C Interatomic distance C ____________________ xij = x1(i,m) - x1(k,j) yij = y1(i,m) - y1(k,j) zij = z1(i,m) - z1(k,j) C __________________________________ C C Apply periodic boundary conditions C __________________________________ dpbcx = - boxx*dnint(xij/boxx) dpbcy = - boxy*dnint(yij/boxy) dpbcz = - boxz*dnint(zij/boxz) xij = xij + dpbcx yij = yij + dpbcy zij = zij + dpbcz rij2 = xij*xij + yij*yij + zij*zij C ________________ C C Calculate forces C ________________ IF (rij2.le.rcutsq) then rij = dsqrt(rij2) r_2 = sig1sq/rij2 r_6 = r_2*r_2*r_2 r_12 = r_6*r_6 pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1 fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij) fxlj = fij*xij fylj = fij*yij fzlj = fij*zij ax(i,m) = ax(i,m) + fxlj ay(i,m) = ay(i,m) + fylj az(i,m) = az(i,m) + fzlj ax(k,j) = ax(k,j) - fxlj ay(k,j) = ay(k,j) - fylj az(k,j) = az(k,j) - fzlj pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj) IF (ngmol(j,m).eq.0) then xmolij = xmol(m) - xmol(j) + dpbcx ymolij = ymol(m) - ymol(j) + dpbcy zmolij = zmol(m) - zmol(j) + dpbcz rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij & +zmolij*zmolij) nr = dnint(rmolij/dgr) ng12(nr) = ng12(nr)+2 ngmol(j,m) = 1 ENDIF ENDIF ENDDO ENDDO ENDDO DO m=1,nmol DO i=1,2 write(*,100)ax(i,m),ay(i,m),az(i,m) ENDDO ENDDO and below is the parallelised part c------------------------------------------------------------------------ c PARALLEL CODE: c-------------------------------------------------------------------------- DO m=1,nmol DO i=1,2 ax(i,m)=0.0d0 ay(i,m)=0.0d0 az(i,m)=0.0d0 ENDDO DO j=1,nmol ngmol(j,m)=0 ENDDO ENDDO CALL para_range(1, nmol, nprocs, myrank, nmolsta, nmolend) DO m=nmolsta,nmolend-1 !!nmol is diveded into two parts !!and nmolsta and nmolend are starting and ending !!index for each processor DO i=1,2 ibeg = inbl(natom) iend = inbl(natom+1)-1 DO ilist=ibeg,iend !! no. of neighbors j=inblst1(ilist) !! neighbor molecular label k=inblst2(ilist) !! neighbor atomic label c ____________________ C C Interatomic distance C ____________________ xij = x1(i,m) - x1(k,j) yij = y1(i,m) - y1(k,j) zij = z1(i,m) - z1(k,j) dpbcx = - boxx*dnint(xij/boxx) dpbcy = - boxy*dnint(yij/boxy) dpbcz = - boxz*dnint(zij/boxz) xij = xij + dpbcx yij = yij + dpbcy zij = zij + dpbcz rij2 = xij*xij + yij*yij + zij*zij IF (rij2.le.rcutsq) then rij = dsqrt(rij2) r_2 = sig1sq/rij2 r_6 = r_2*r_2*r_2 r_12 = r_6*r_6 pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1 fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij) fxlj = fij*xij fylj = fij*yij fzlj = fij*zij ax(i,m) = ax(i,m) + fxlj ay(i,m) = ay(i,m) + fylj az(i,m) = az(i,m) + fzlj ax(k,j) = ax(k,j) - fxlj ay(k,j) = ay(k,j) - fylj az(k,j) = az(k,j) - fzlj pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj) IF (ngmol(j,m).eq.0) then xmolij = xmol(m) - xmol(j) + dpbcx ymolij = ymol(m) - ymol(j) + dpbcy zmolij = zmol(m) - zmol(j) + dpbcz rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij & +zmolij*zmolij) nr = dnint(rmolij/dgr) ng12(nr) = ng12(nr)+2 ngmol(j,m) = 1 ENDIF ENDIF ENDDO natom = natom +1 ENDDO ENDDO if(myrank.ne.(nprocs-1))then !! this is for nmolend molecule on each processor DO i=1,2 m=nmolend natom = (2*nmolend)-1 ibeg = inbl(natom) iend = inbl(natom+1)-1 DO ilist=ibeg,iend !! no. of neighbors j=inblst1(ilist) !! neighbor molecular label k=inblst2(ilist) !! neighbor atomic label xij = x1(i,m) - x1(k,j) yij = y1(i,m) - y1(k,j) zij = z1(i,m) - z1(k,j) dpbcx = - boxx*dnint(xij/boxx) dpbcy = - boxy*dnint(yij/boxy) dpbcz = - boxz*dnint(zij/boxz) xij = xij + dpbcx yij = yij + dpbcy zij = zij + dpbcz rij2 = xij*xij + yij*yij + zij*zij IF (rij2.le.rcutsq) then rij = dsqrt(rij2) r_2 = sig1sq/rij2 r_6 = r_2*r_2*r_2 r_12 = r_6*r_6 pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1 fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij) fxlj = fij*xij fylj = fij*yij fzlj = fij*zij ax(i,m) = ax(i,m) + fxlj ay(i,m) = ay(i,m) + fylj az(i,m) = az(i,m) + fzlj ax(k,j) = ax(k,j) - fxlj ay(k,j) = ay(k,j) - fylj az(k,j) = az(k,j) - fzlj pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj)!!............doubt IF (ngmol(j,m).eq.0) then xmolij = xmol(m) - xmol(j) + dpbcx ymolij = ymol(m) - ymol(j) + dpbcy zmolij = zmol(m) - zmol(j) + dpbcz rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij & +zmolij*zmolij) nr = dnint(rmolij/dgr) ng12(nr) = ng12(nr)+2 ngmol(j,m) = 1 ENDIF ENDIF ENDDO natom = natom +1 ENDDO endif c-------------------------------------------------------------------------------------------- An all to all communication to gather all acceleration information on both the processors c---------------------------------------------------------------------------------------------- DO irank = 0, nprocs - 1 CALL para_range(1, nmol, nprocs, irank, nmolsta, nmolend) jjasta(irank) = nmolsta jjalen(irank) = nua * (nmolend - nmolsta + 1) ENDDO CALL para_range(1, nmol, nprocs, myrank, nmolsta, nmolend) DO irank = 0, nprocs - 1 CALL MPI_BCAST(ax(1,jjasta(irank)), jjalen(irank), MPI_REAL8, & irank, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(ay(1,jjasta(irank)), jjalen(irank), MPI_REAL8, & irank, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(az(1,jjasta(irank)), jjalen(irank), MPI_REAL8, & irank, MPI_COMM_WORLD, ierr) ENDDO c---------------------------------------------------------------------------------- If(myrank.eq.0)then !! or if(myrank.eq.1) DO m=1,nmol DO i=1,2 write(*,100)ax(i,m),ay(i,m),az(i,m) ENDDO ENDDO endif SUBROUTINE para_range(n1, n2, nprocs, irank, ista, iend) iwork1 = (n2 - n1 + 1) / nprocs iwork2 = MOD(n2 - n1 + 1, nprocs) ista = irank * iwork1 + n1 + MIN(irank, iwork2) iend = ista + iwork1 - 1 IF (iwork2 > irank) iend = iend + 1 END MY question is : when I printed the results,accelerations on processor 0( ie from 1 to nmol/2) are same as the results for serial code, where as they arent same for processor 1(nmol/2+1 to nmol). As I am learning MPI I couldnt find where it went wrong in doing an all to all operation for accleration part ax(i,m),ay(i,m),az(i,m). Please give me some solution for this part. thank you all, regards, Ramesh