In trying to debug an MPI_Waitall hang on a remote node, I created a simple code to test.
If we run the simple code below on 2 nodes on a local machine, we send the number 1 and receive number 1 back. If we run the same code on a local node and a remote node, we send number 1 but get 32767 back. Any ideas ??? #include <string.h> #include <stdio.h> #include <stdlib.h> #include "mpi.h" #define PCPU 8 int rank,nproc; main(argc, argv) int argc; char *argv[]; { int i,j,k,i1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (rank==0) { i1 = 1; mpisend(i1); }else{ k=mpirecv(); printf("R%d: recvd %d\n",rank,k); } MPI_Finalize(); } mpisend(ok) int ok; { int i,j,k,m; int tag=201; MPI_Request request[PCPU]; MPI_Status status[PCPU]; for (m=1;m<nproc;m++) { printf("R%d->%d\n",rank,m); MPI_Isend(&ok, 1, MPI_INT, m, tag+m, MPI_COMM_WORLD,&request[m-1]); } } mpirecv() { int i,j,k,m; int hrecv; int tag=201; MPI_Request request[PCPU]; MPI_Status status[PCPU]; MPI_Irecv(&hrecv, 1, MPI_INT, 0, tag+rank, MPI_COMM_WORLD, &request[rank-1]); MPI_Waitall(1,&request[rank-1],&status[rank-1]); return(hrecv); }