Hi, I reinstalled openmpi to a svn snapshot version to fixed the MPI_File_open return 16 error, but the same code
#include <stdio.h> #include "mpi.h" int main(int argc, char *argv[]) { MPI_File fh; MPI_Init( &argc, &argv ); int ret = MPI_File_open( MPI_COMM_WORLD, "temp", MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); if (ret != MPI_SUCCESS) { fprintf(stderr, "open file failed, code=%d\n", ret); } else { MPI_File_close(&fh); } MPI_Finalize(); return 0; } now will cause a dead loop in both servers, attached gdb show (gdb) backtra #0 0x00002b09384df253 in epoll_wait () from /lib/libc.so.6 #1 0x00002b0937716abf in epoll_dispatch () from /home/changsheng/local/lib/libopen-pal.so.0 #2 0x00002b09377159fb in opal_event_base_loop () from /home/changsheng/local/lib/libopen-pal.so.0 #3 0x00002b0937700f21 in opal_progress () from /home/changsheng/local/lib/libopen-pal.so.0 #4 0x00002b093721d015 in ompi_request_default_wait_all () from /home/changsheng/local/lib/libmpi.so.0 #5 0x00002b093c8d5ac4 in ompi_coll_tuned_allreduce_intra_recursivedoubling () from /home/changsheng/local//lib/openmpi/mca_coll_tuned.so #6 0x00002b0937210aa9 in ompi_comm_nextcid () from /home/changsheng/local/lib/libmpi.so.0 #7 0x00002b093720ebde in ompi_comm_dup () from /home/changsheng/local/lib/libmpi.so.0 #8 0x00002b093722cd35 in PMPI_Comm_dup () from /home/changsheng/local/lib/libmpi.so.0 #9 0x00002b093cf29db7 in mca_io_romio_dist_MPI_File_open () from /home/changsheng/local//lib/openmpi/mca_io_romio.so #10 0x00002b09372501e7 in mca_io_base_file_select () from /home/changsheng/local/lib/libmpi.so.0 #11 0x00002b093721717e in ompi_file_open () from /home/changsheng/local/lib/libmpi.so.0 #12 0x00002b0937247405 in PMPI_File_open () from /home/changsheng/local/lib/libmpi.so.0 #13 0x0000000000400a78 in main () How to fixed this problem, or any refer? Thanks. Changsheng Jiang