-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Hi all,
I encounter a problem with the routine MPI_ACCUMULATE trying to sum up MPI_REAL8's on a large memory window with a large offset. My program running (on a single processor, x86_64 architecture) crashes with an error message like: [node14:16236] *** Process received signal *** [node14:16236] Signal: Segmentation fault (11) [node14:16236] Signal code: Address not mapped (1) [node14:16236] Failing at address: 0x2aaa32b16000 [node14:16236] [ 0] /lib64/libpthread.so.0 [0x32e080de00] [node14:16236] [ 1] /home/stefan/bin/openmpi-1.2.5/lib/libmpi.so.0(ompi_mpi_op_sum_double+0x10) [0x2aaaaaf15530] [node14:16236] [ 2] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so(ompi_osc_pt2pt_process_op+0x2d7) [0x2aaab1a47257] [node14:16236] [ 3] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so [0x2aaab1a45432] [node14:16236] [ 4] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so(ompi_osc_pt2pt_passive_unlock+0x93) [0x2aaab1a48243] [node14:16236] [ 5] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so [0x2aaab1a43436] [node14:16236] [ 6] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so(ompi_osc_pt2pt_progress+0xff) [0x2aaab1a42e0f] [node14:16236] [ 7] /home/stefan/bin/openmpi-1.2.5/lib/libopen-pal.so.0(opal_progress+0x4a) [0x2aaaab3dfa0a] [node14:16236] [ 8] /home/stefan/bin/openmpi-1.2.5/lib/openmpi/mca_osc_pt2pt.so(ompi_osc_pt2pt_module_unlock+0x2a9) [0x2aaab1a48629] [node14:16236] [ 9] /home/stefan/bin/openmpi-1.2.5/lib/libmpi.so.0(PMPI_Win_unlock+0xe1) [0x2aaaaaf4a291] [node14:16236] [10] /home/stefan/bin/openmpi-1.2.5/lib/libmpi_f77.so.0(mpi_win_unlock_+0x25) [0x2aaaaacdd8c5] [node14:16236] [11] /home/stefan/calc/mpi2_test/a.out(MAIN__+0x809) [0x401851] [node14:16236] [12] /home/stefan/calc/mpi2_test/a.out(main+0xe) [0x401bbe] [node14:16236] [13] /lib64/libc.so.6(__libc_start_main+0xf4) [0x32dfc1dab4] [node14:16236] [14] /home/stefan/calc/mpi2_test/a.out [0x400f99] [node14:16236] *** End of error message *** mpirun noticed that job rank 0 with PID 16236 on node node14 exited on signal 11 (Segmentation fault). The relevant part of my FORTRAN source code reads as: ~ program accumulate_test ~ IMPLICIT REAL*8 (A-H,O-Z) ~ include 'mpif.h' ~ INTEGER(KIND=MPI_OFFSET_KIND) MX_SIZE_M C dummy size parameter ~ PARAMETER (MX_SIZE_M = 1 000 000) ~ INTEGER MPIerr, MYID, NPROC ~ INTEGER ITARGET, MY_X_WIN, JCOUNT, JCOUNT_T ~ INTEGER(KIND=MPI_ADDRESS_KIND) MEM_X, MEM_Y ~ INTEGER(KIND=MPI_ADDRESS_KIND) IDISPL_WIN ~ INTEGER(KIND=MPI_ADDRESS_KIND) PTR1, PTR2 ~ INTEGER(KIND=MPI_INTEGER_KIND) ISIZE_REAL8 ~ INTEGER*8 NELEMENT_X, NELEMENT_Y ~ POINTER (PTR1, XMAT(MX_SIZE_M)) ~ POINTER (PTR2, YMAT(MX_SIZE_M)) C ~ CALL MPI_INIT( MPIerr ) ~ CALL MPI_COMM_RANK( MPI_COMM_WORLD, MYID, MPIerr) ~ CALL MPI_COMM_SIZE( MPI_COMM_WORLD, NPROC, MPIerr) C ~ NELEMENT_X = 400 000 000 ~ NELEMENT_Y = 10 000 C ~ CALL MPI_TYPE_EXTENT(MPI_REAL8, ISIZE_REAL8, MPIerr) ~ MEM_X = NELEMENT_X * ISIZE_REAL8 ~ MEM_Y = NELEMENT_Y * ISIZE_REAL8 C C allocate memory C ~ CALL MPI_ALLOC_MEM( MEM_X, MPI_INFO_NULL, PTR1, MPIerr) ~ CALL MPI_ALLOC_MEM( MEM_Y, MPI_INFO_NULL, PTR2, MPIerr) C C fill vectors with 0.0D0 and 1.0D0 C ~ CALL DZERO(XMAT,NELEMENT_X) ~ CALL DONE(YMAT,NELEMENT_Y) C C open memory window C ~ CALL MPI_WIN_CREATE( XMAT, MEM_X, ISIZE_REAL8, ~ & MPI_INFO_NULL, MPI_COMM_WORLD, ~ & MY_X_WIN, MPIerr ) C lock window (MPI_LOCK_SHARED mode) C select target ==> if itarget == myid: no 1-sided communication C ~ ITARGET = MYID ~ CALL MPI_WIN_LOCK( MPI_LOCK_SHARED, ITARGET, MPI_MODE_NOCHECK, ~ & MY_X_WIN, MPIerr) C C transfer data to target ITARGET C ~ JCOUNT_T = 10 000 ~ JCOUNT = JCOUNT_T C set displacement in memory window ~ IDISPL_WIN = 300 000 000 C ~ CALL MPI_ACCUMULATE( YMAT, JCOUNT, MPI_REAL8, ITARGET, IDISPL_WIN, ~ & JCOUNT_T, MPI_REAL8, MPI_SUM, MY_X_WIN, MPIerr) C C unlock C ~ CALL MPI_WIN_UNLOCK( ITARGET, MY_X_WIN, MPIerr) ... The complete source code (accumulate_test.F) is attached to this e-mail as well as the config.log of my OpenMPI installation. The program only(!) fails for values of IDISPL_WIN > 268 435 455!!! For all lower offset values it finishes normally. Therefore, I assume that after the internal multiplication (in MPI_ACCUMULATE) of IDISPL_WIN with the window scaling factor ISIZE_REAL8 (== 8 byte) an INTEGER(*4) overflow occurs, although IDISPL_WIN is declared as KIND=MPI_ADDRESS_KIND (INTEGER*8). Might that be the reason? Running this program doing rather MPI_GET than MPI_ACCUMULATE with the same offsets is no problem at all. Thanks in advance for any help, stefan - -- - ------------------------------------------- Dipl. Chem. Stefan Knecht Institute for Theoretical and Computational Chemistry Heinrich-Heine University Düsseldorf Universitätsstraße 1 Building 26.32 Room 03.33 40225 Düsseldorf phone: +49-(0)211-81-11439 e-mail: ste...@theochem.uni-duesseldorf.de http://www.theochem.uni-duesseldorf.de/users/stefan -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.2 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iD8DBQFHmcaQFgKivGtHXsARAqG2AJ9xjTXKs5+Y3hoNd0g93Ue3ceFnUACdEmQN MyOMP2fGCOEzrTwaNZAWPsA= =P17R -----END PGP SIGNATURE-----
ompi-output.tar.gz
Description: GNU Zip compressed data
program accumulate_test IMPLICIT REAL*8 (A-H,O-Z) include 'mpif.h' INTEGER(KIND=MPI_OFFSET_KIND) MX_SIZE_M C dummy size parameter PARAMETER (MX_SIZE_M = 1 000 000) INTEGER MPIerr, MYID, NPROC INTEGER ITARGET, MY_X_WIN, JCOUNT, JCOUNT_T INTEGER(KIND=MPI_ADDRESS_KIND) MEM_X, MEM_Y INTEGER(KIND=MPI_ADDRESS_KIND) IDISPL_WIN INTEGER(KIND=MPI_ADDRESS_KIND) PTR1, PTR2 INTEGER(KIND=MPI_INTEGER_KIND) ISIZE_REAL8 INTEGER*8 NELEMENT_X, NELEMENT_Y POINTER (PTR1, XMAT(MX_SIZE_M)) POINTER (PTR2, YMAT(MX_SIZE_M)) C CALL MPI_INIT( MPIerr ) CALL MPI_COMM_RANK( MPI_COMM_WORLD, MYID, MPIerr) CALL MPI_COMM_SIZE( MPI_COMM_WORLD, NPROC, MPIerr) C NELEMENT_X = 400 000 000 NELEMENT_Y = 10 000 C CALL MPI_TYPE_EXTENT(MPI_REAL8, ISIZE_REAL8, MPIerr) C MEM_X = NELEMENT_X * ISIZE_REAL8 MEM_Y = NELEMENT_Y * ISIZE_REAL8 C C allocate memory C CALL MPI_ALLOC_MEM( MEM_X, MPI_INFO_NULL, PTR1, MPIerr) CALL MPI_ALLOC_MEM( MEM_Y, MPI_INFO_NULL, PTR2, MPIerr) C C fill vectors with zero's and one C CALL DZERO(XMAT,NELEMENT_X) CALL DONE(YMAT,NELEMENT_Y) C C open memory window C CALL MPI_WIN_CREATE( XMAT, MEM_X, ISIZE_REAL8, & MPI_INFO_NULL, MPI_COMM_WORLD, & MY_X_WIN, MPIerr ) C C lock window (MPI_LOCK_SHARED mode) C C select target ==> if itarget == myid: no 1-sided communication C ITARGET = MYID C CALL MPI_WIN_LOCK( MPI_LOCK_SHARED, ITARGET, MPI_MODE_NOCHECK, & MY_X_WIN, MPIerr) C C transfer data to target ITARGET C JCOUNT_T = 10 000 JCOUNT = JCOUNT_T C IDISPL_WIN = 300 000 000 C CALL MPI_ACCUMULATE( YMAT, JCOUNT, MPI_REAL8, ITARGET, IDISPL_WIN, & JCOUNT_T, MPI_REAL8, MPI_SUM, MY_X_WIN, MPIerr) C C unlock C CALL MPI_WIN_UNLOCK( ITARGET, MY_X_WIN, MPIerr) C C free memory window C CALL MPI_WIN_FREE( MY_X_WIN, MPIerr) C C de-allocate C CALL MPI_FREE_MEM( XMAT, MPIerr) CALL MPI_FREE_MEM( YMAT, MPIerr) C CALL MPI_FINALIZE( MPIerr) end program C................................................................... C................................................................... C /* Deck dzero */ SUBROUTINE DZERO(DX,LENGTH) implicit real*8 (A-H,O-Z) C................................................................... INTEGER*8 LENGTH DIMENSION DX(LENGTH) C IF (LENGTH.LE.0) RETURN C DO 100 I = 1,LENGTH 100 DX(I) = 0.0D00 C END C /* Deck done */ SUBROUTINE DONE(DX,LENGTH) implicit real*8 (A-H,O-Z) C................................................................... INTEGER*8 LENGTH DIMENSION DX(LENGTH) C IF (LENGTH.LE.0) RETURN C DO 100 I = 1,LENGTH 100 DX(I) = 1.0D00 C END