The problem with both the f77 and f90 programs is that you forgot to put "ierr" as the last argument to MPI_PACK. This causes a segv because neither of them are correct MPI programs.

But it's always good to hear that we can deliver a smaller corefile in v1.2! :-)


On Mar 16, 2007, at 7:25 PM, Erik Deumens wrote:


I have a small program in F77 that makes a SEGV crash with
a 130MB core file. It is true that the crash is much cleaner
in OpenMPI 1.2; nice improvement! The core file is 500 MB with
OpenMPI 1.1.

I am running on CentOS 4.4 with the latest patches.

mpif77 -g -o bug bug.f
mpirun -np 2 ./bug

I also have a bug.f90 (which I made first) and it crashes
too with the Intel ifort compiler 9.1.039.

--
Dr. Erik Deumens
Interim Director
Quantum Theory Project
New Physics Building 2334                    deum...@qtp.ufl.edu
University of Florida            http://www.qtp.ufl.edu/~deumens
Gainesville, Florida 32611-8435                    (352)392-6980

      program mainf
c     mpif77 -g -o bug bug.f
c     mpirun -np 2 ./bug
      implicit none
      include 'mpif.h'
      character*80 inpfile
      integer l
      integer i
      integer stat
      integer cmdbuf(4)
      integer lcmdbuf
      integer ierr
      integer ntasks
      integer taskid
      integer bufpos
      integer cmd
      integer ldata
      character*(mpi_max_processor_name) hostnm
      integer iuinp
      integer iuout
      integer lnam
      real*8 bcaststart
      iuinp = 5
      iuout = 6
      lcmdbuf = 16
      i = 0
      call mpi_init(ierr)
      call mpi_comm_size (mpi_comm_world, ntasks, ierr)
      call mpi_comm_rank (mpi_comm_world, taskid, ierr)
      hostnm = ' '
      call mpi_get_processor_name (hostnm, lnam, ierr)
      write (iuout,*) 'task',taskid,'of',ntasks,'on ',hostnm(1:lnam)
      if (taskid == 0) then
        inpfile = ' '
        do
          write (iuout,*) 'Enter .inp filename:'
          read (iuinp,*) inpfile
          if (inpfile /= ' ') exit
        end do
        l = len_trim(inpfile)
        write (iuout,*) 'task',taskid,inpfile(1:l)
        bufpos = 0
        cmd = 1099
        ldata = 7
        write (iuout,*) 'task',taskid,cmdbuf,bufpos
        write (iuout,*) 'task',taskid,cmd,lcmdbuf
        call mpi_pack (cmd, 1, MPI_INTEGER,
     *       cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
        write (iuout,*) 'task',taskid,cmdbuf,bufpos
        write (iuout,*) 'task',taskid,ldata,lcmdbuf
        call mpi_pack (ldata, 1, MPI_INTEGER,
     *       cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
        bcaststart = mpi_wtime()
        write (iuout,*) 'task',taskid,cmdbuf,bufpos
        write (iuout,*) 'task',taskid,bcaststart,lcmdbuf
        call mpi_pack (bcaststart, 1, MPI_DOUBLE_PRECISION,
     *       cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
        write (iuout,*) 'task',taskid,cmdbuf,bufpos
      end if
      call mpi_bcast (cmdbuf, lcmdbuf, MPI_PACKED,
     *     0, MPI_COMM_WORLD, ierr)
      call mpi_finalize(ierr)
      stop
      end program mainf

program mainf
  ! ifort -g -I /share/local/lib/ompi/include -o bug bug.f90
  !       -L /share/local/lib/ompi/lib -lmpi_f77 -lmpi
  ! mpirun -np 2 ./bug
  implicit none
  include 'mpif.h'
  character(len=80) :: inpfile
  character(len=1), dimension(80) :: cinpfile
  integer :: l
  integer :: i
  integer :: stat
  integer, dimension(4) :: cmdbuf
  integer :: lcmdbuf
  integer :: ierr
  integer :: ntasks
  integer :: taskid
  integer :: bufpos
  integer :: cmd
  integer :: ldata
  character(len=mpi_max_processor_name) :: hostnm
  integer :: iuinp = 5
  integer :: iuout = 6
  integer :: lnam
  real(8) :: bcaststart
  lcmdbuf = 16
  i = 0
  call mpi_init(ierr)
  call mpi_comm_size (mpi_comm_world, ntasks, ierr)
  call mpi_comm_rank (mpi_comm_world, taskid, ierr)
  hostnm = ' '
  call mpi_get_processor_name (hostnm, lnam, ierr)
  write (iuout,*) 'task',taskid,'of',ntasks,'on ',hostnm(1:lnam)
  if (taskid == 0) then
     inpfile = ' '
     do
        write (iuout,*) 'Enter .inp filename:'
        read (iuinp,*) inpfile
        if (inpfile /= ' ') exit
     end do
     l = len_trim(inpfile)
     do i=1,l
        cinpfile(i) = inpfile(i:i)
     end do
     cinpfile(l+1) = char(0)
     write (iuout,*) 'task',taskid,inpfile(1:l)
     bufpos = 0
     cmd = 1099
     ldata = 7
     write (iuout,*) 'task',taskid,cmdbuf,bufpos
     ! The next two lines exhibit the bug
     ! Uncomment the first and the program works
     ! Uncomment the second and the program dies in mpi_pack
     ! and produces a 137 MB core file.
     write (iuout,*) 'task',taskid,cmd,lcmdbuf
!     write (iuout,*) 'task',taskid,cmd
     call mpi_pack (cmd, 1, MPI_INTEGER, &
          cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
     write (iuout,*) 'task',taskid,cmdbuf,bufpos
     write (iuout,*) 'task',taskid,ldata,lcmdbuf
     call mpi_pack (ldata, 1, MPI_INTEGER, &
          cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
     bcaststart = mpi_wtime()
     write (iuout,*) 'task',taskid,cmdbuf,bufpos
     write (iuout,*) 'task',taskid,bcaststart,lcmdbuf
     call mpi_pack (bcaststart, 1, MPI_DOUBLE_PRECISION, &
          cmdbuf, lcmdbuf, bufpos, MPI_COMM_WORLD)
     write (iuout,*) 'task',taskid,cmdbuf,bufpos
  end if
  call mpi_bcast (cmdbuf, lcmdbuf, MPI_PACKED, &
       0, MPI_COMM_WORLD, ierr)
  call mpi_finalize(ierr)
  stop
end program mainf

_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users


--
Jeff Squyres
Cisco Systems

Reply via email to