In trying to debug an MPI_Waitall hang on a remote
node, I created a simple code to test.

If we run the simple code below on 2 nodes on a local
machine, we send the number 1 and receive number 1 back.

If we run the same code on a local node and a remote node,
we send number 1 but get 32767 back. Any ideas ???


#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"

#define PCPU 8
int rank,nproc;

main(argc, argv)
int argc;
char *argv[];
{
  int i,j,k,i1;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (rank==0) {
   i1 = 1;
   mpisend(i1);
  }else{
   k=mpirecv();
   printf("R%d: recvd %d\n",rank,k);
  }
  MPI_Finalize();
}

mpisend(ok)

int ok;

{
  int i,j,k,m;
  int tag=201;
  MPI_Request request[PCPU];
  MPI_Status status[PCPU];

  for (m=1;m<nproc;m++) {
   printf("R%d->%d\n",rank,m);
   MPI_Isend(&ok, 1, MPI_INT, m, tag+m, MPI_COMM_WORLD,&request[m-1]);
  }

}

mpirecv()

{
  int i,j,k,m;
  int hrecv;
  int tag=201;
  MPI_Request request[PCPU];
  MPI_Status status[PCPU];

  MPI_Irecv(&hrecv, 1, MPI_INT, 0, tag+rank, MPI_COMM_WORLD, &request[rank-1]);
  MPI_Waitall(1,&request[rank-1],&status[rank-1]);
  return(hrecv);
}


Reply via email to