Hi, one of our cluster users reported a problem with openmpi. He created a short sample (just a few lines) which will start and crash after a short time.We only see "Fatal error in PMPI_Gather: Other MPI error" - no further details. He is using an intel fortran compiler with a self compiled openmpi (just tested 1.8.1).
I've know nearly nothing about mpi(openmpi) so I'm asking at this forum. Has anybody some idea ? Thanks, Peer -----------------------makefile---------- OPTIONS=-assume byterecl -fpp -allow nofpp_comments -free DEBUG=-g -d-lines -check -debug -debug-parameters -fpe0 -traceback all: rm -f JeDi globe_mod.mod JeDi.out jedi_restart $(SOURCE) ; mpif90 $(OPTIONS) $(DEBUG) -o JeDi globe.f90 -------------------------- ----------------globe.f90--------------------- program globe use mpi implicit none integer :: mpinfo = 0 integer :: myworld = 0 integer :: mypid = 0 integer :: npro = 1 ! * The comments give some conditions required to reproduce the problem.! * If the program runs at two hosts, the error message is shown two times
integer, parameter :: vv_g_d1 = 2432 integer, parameter :: vv_p_d1 = vv_g_d1 / 16 ! requires 16 CPUsinteger, parameter :: out_d1 = 2418 ! requires >=2416 (vv_g_d1 - 16)
integer, parameter :: d2 = 5001 ! requires >=4282 @ ii=30 / >=6682 @ ii=20 (depends on number of loops, but this limit can change for unknown reason)
integer :: ii, jj real :: vv_p(vv_p_d1,d2) real,allocatable :: vv_g(:,:)! * requires the definition of the variable for write to be defined below vv_g(:,:)
real :: out(out_d1,d2) vv_p(:,:) = 0.0 out(:,:) = 0.0 call mpi_init(mpinfo) myworld = MPI_COMM_WORLD call mpi_comm_size(myworld, npro, mpinfo) ! * The problem requires 16 CPUsif (npro .ne. 16) then; write(*,*) "Works only with 16 CPUs"; stop; endif
call mpi_comm_rank(myworld, mypid, mpinfo) if (mypid == 0) then open(11, FILE='jedi_restart', STATUS='replace', FORM='unformatted') endif write(6,*) "test1",mypid ; flush(6) do ii = 1, 25 ! number of loops depends on field size allocate(vv_g(vv_g_d1,d2)) do jj = 1, d2call mpi_gather(vv_p(1,jj), vv_p_d1, MPI_REAL, vv_g(1,jj), vv_p_d1, MPI_REAL, 0, myworld, mpinfo)
enddo if (mypid == 0) then; write(11) out; flush(11); endif deallocate(vv_g) enddo write(6,*) "test2",mypid ; flush(6) if (mypid == 0) close(11) call mpi_barrier(myworld, mpinfo) call mpi_finalize(mpinfo) end---------------------------------------------end globe.f90----------------------
-- Mit freundlichem Gruß Peer-Joachim Koch _________________________________________________________ Max-Planck-Institut für Biogeochemie Dr. Peer-Joachim Koch Hans-Knöll Str.10 Telefon: ++49 3641 57-6705 D-07745 Jena Telefax: ++49 3641 57-7705
<<attachment: pkoch.vcf>>
smime.p7s
Description: S/MIME Cryptographic Signature