Cezary Sliwa wrote:
Jeff Squyres wrote:
Please note that I replied to your original post:

     http://www.open-mpi.org/community/lists/users/2006/02/0712.php

Was that not sufficient? If not, please provide more details on what you are attempting to do and what is occurring. Thanks.

I have a simple program in which the rank 0 task dispatches compute tasks to other processes. It works fine on one 4-way SMP machine, but when I try to run it on two nodes, the processes on the other machine seem to spin in a loop inside MPI_SEND (a message is not delivered).

And this despite a matching MPI_IRECV has been called in the rank 0 task.

Cezary Sliwa

      program ng

      implicit none

      external Nsum, Gsum

      double precision Nsum, Gsum, integrate, ee, resn, resg, n, g, s1

      double precision H, theta, phi, BG, thetaM, phiM, kBT, EF
      common / intparms / H, theta, phi, BG, thetaM, phiM, kBT, EF

      double precision ialpha, hbar, c, e, kB, hartree_eV, hartree_J,
     $     au_T, au_angstrom, T

      parameter ( ialpha = 137.0359991d0, hbar = 1, c = ialpha, e = 1,
     $     kB = 3.1668153d-6,
     $     hartree_eV = 27.2113845d0, hartree_J = 4.35974417d-18,
     $     au_T = 2.35051742d5, au_angstrom = 0.5291772108d0,
     $     T = 10d0 )

      double precision a, b

      double precision pi

      parameter ( pi = 3.141592653589793d0 )


      include 'mpif.h'

      integer ierr, rank, status(MPI_STATUS_SIZE)

      integer cmd(2)
      double precision buf(1), bufx, bufy
      integer nd

      integer size, whatfun
      common / commparms / size, whatfun


      call mpi_init(ierr)

      kBT = kB*T

      H = 2.0d0*c/au_T
      theta = 0d0
      phi = 0d0

      BG = -0.03d0/hartree_eV
      thetaM = 0d0
      phiM = 0d0

      call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)

      if(rank .ne. 0) then

         do while(.true.)
            call mpi_recv(cmd, 2, MPI_INTEGER, 0, 1, MPI_COMM_WORLD,
     $           status, ierr)

            print *, rank, ' received ', cmd(1)

            select case(cmd(1))

            case(1)
               goto 300

            case(2)
               call mpi_recv(buf, 1, MPI_DOUBLE_PRECISION, 0, 2,
     $              MPI_COMM_WORLD, status, ierr)
               EF = buf(1)

            case(3)
               call mpi_recv(bufx, 1, MPI_DOUBLE_PRECISION, 0, 3,
     $              MPI_COMM_WORLD, status, ierr)

               select case(cmd(2))
               case(1)
                  bufy = Nsum(bufx)
               case(2)
                  bufy = Gsum(bufx)
               case default
                  write(*, *) '***   unknown cmd(2)   ***'
                  stop
               end select

               call mpi_send(bufy, 1, MPI_DOUBLE_PRECISION, 0, 3,
     $              MPI_COMM_WORLD, ierr)

            case default
               write(*, *) '***   unknown cmd(1)   ***'
               stop

            end select

         end do
300      continue

      else

         call mpi_comm_size(MPI_COMM_WORLD, size, ierr)

         open(12, status='OLD', FILE='in_ef.txt')
         open(13, status='UNKNOWN', FILE='out.txt')

         do while(.true.)

            read(12, *, end=100) ee

            EF = ee/hartree_eV

            do nd = 1, size-1
               cmd(1) = 2
               buf = EF
               call mpi_send(cmd, 2, MPI_INTEGER, nd, 1, MPI_COMM_WORLD,
     $              ierr)
               call mpi_send(buf, 1, MPI_DOUBLE_PRECISION, nd, 2,
     $              MPI_COMM_WORLD, ierr)
            end do


            a = -0.25d0
            b = 0.25d0

            whatfun = 1
            resn = integrate(Nsum, a, b)

            whatfun = 2
            resg = integrate(Gsum, a, b)

            s1 = e*H/(hbar*c) / (2*pi)**2 / (au_angstrom * 1d-8)**3

            n = s1 * resn
            g = (hartree_J * 1d7) * kBT * s1 * resg

            write(13,*) EF, ee, resn, n, resg, g

         end do

 100     close(12)
         close(13)

         cmd(1) = 1
         do nd = 1, size-1
            call mpi_send(cmd, 2, MPI_INTEGER, nd, 1, MPI_COMM_WORLD,
     $           ierr)
         end do

      end if

      call mpi_finalize(ierr)

      end program


      subroutine int_fun(n, x, y)

      implicit none

      integer n
      double precision x(n), y(n)

      include 'mpif.h'

      integer ierr, status(MPI_STATUS_SIZE)

      integer cmd(2)
      logical flag

      integer node_status(size-1), requests(size-1), i, nd, pending

      integer size, whatfun
      common / commparms / size, whatfun


      do nd = 1, size-1
         node_status(nd) = 0
      end do

      pending = 0

      i = 0
      do while(i .lt. n .or. pending .ne. 0)

         if(i .ge. n) goto 600

         do nd = 1, size-1
            if(node_status(nd) .eq. 0) goto 500
         end do
         goto 600

 500     i = i + 1

         print *, 'sending task ', i, ' to ', nd

         cmd(1) = 3
         cmd(2) = whatfun
         call mpi_send(cmd, 2, MPI_INTEGER, nd, 1, MPI_COMM_WORLD, ierr)
         call mpi_send(x(i), 1, MPI_DOUBLE_PRECISION, nd, 3,
     $        MPI_COMM_WORLD, ierr)

         call mpi_irecv(y(i), 1, MPI_DOUBLE_PRECISION, nd, 3,
     $        MPI_COMM_WORLD, requests(nd), ierr)

         node_status(nd) = 1
         pending = pending + 1

         goto 700

 600     call usleep(1000)

         do nd = 1, size-1
            if(node_status(nd) .ne. 0) then
               call mpi_test(requests(nd), flag, status, ierr)
               if(flag) then
                  node_status(nd) = 0
                  pending = pending - 1
               end if
            end if
         end do

 700     continue
      end do

      end subroutine

Reply via email to