Hello,

I have run into an issue that appears to be related to sending messages to
multiple processes on a single remote host prior to the remote processes
sending messages to the origin. I have cooked the issue down to the
following:


*Test Environment of 3 Identical Hosts:*

·         * Intel i7-2600K, 12GB ram, Intel GB Ethernet, DLink Switch

 ·       * Windows 2008R2 x64 with all current updates

·         * OMPI  (all three hosts report the same ompi_info and were
installed with the same binary)
http://www.open-mpi.org/software/ompi/v1.5/downloads/OpenMPI_v1.5.4-1_win64.exe

C:\GDX>ompi_info -v ompi full --parsable

package:Open MPI hpcfan@VISCLUSTER26 Distribution

ompi:version:full:1.5.4

ompi:version:svn:r25060

ompi:version:release_date:Aug 18, 2011

orte:version:full:1.5.4

orte:version:svn:r25060

orte:version:release_date:Aug 18, 2011

opal:version:full:1.5.4

opal:version:svn:r25060

opal:version:release_date:Aug 18, 2011

ident:1.5.4



*Test Program:*

#include <stdio.h>

#define OMPI_IMPORTS

#include "C:\Program Files (x86)\OpenMPI_v1.5.4-x64\include\mpi.h"



int main(int argc, char *argv[])

{

   int rank, size, i, msg;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   MPI_Comm_size(MPI_COMM_WORLD, &size);

   printf("Process %i of %i initialized\n", rank, size);



   if (0 == rank) {

      for (i = 1; i < size; i++) {

         printf("Process %i sending %i to %i\n", rank, i, i);

         MPI_Send(&rank, 1, MPI_INT, i, 0, MPI_COMM_WORLD);

      }

      for (i = 1; i < size; i++) {

         MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);

         printf("Process %i received %i\n", rank, msg);

      }

   }

   else {

      MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);

      printf("Process %i received %i\n", rank, msg);

      MPI_Send(&rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);

      printf("Process %i sent %i to %i\n", rank, rank, 0);

   }



   printf("Process %i exiting\n", rank);

   MPI_Finalize();

   return 0;

}



*Test Cases:*

·            X procs on the originating node: Working

·            X procs on the originating node and one proc on one or more
remote nodes: Working

·            X procs on the originating node and more than one proc on any
remote node:  Fails

A test with two procs on the origin and one on each of two remote nodes
runs, however the same test with the two remote procs on the same machine
fails on the second remote send. Here are some test runs (the ^C indicates
a hang).

C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 2 c:\gdx\distmsg.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [42094,1] Process rank: 0

        Process OMPI jobid: [42094,1] Process rank: 1



 =============================================================

Process 0 of 2 initialized

Process 1 of 2 initialized

Process 0 sending 1 to 1

Process 1 received 0

Process 1 sent 1 to 0

Process 1 exiting

Process 0 received 1

Process 0 exiting



C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 3 c:\gdx\distmsg.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [42014,1] Process rank: 0

        Process OMPI jobid: [42014,1] Process rank: 1



 Data for node: chuuk   Num procs: 1

        Process OMPI jobid: [42014,1] Process rank: 2



 =============================================================

connecting to chuuk

username:administrator

password:********

Save Credential?(Y/N) n

Process 0 of 3 initialized

Process 1 of 3 initialized

Process 0 sending 1 to 1

Process 0 sending 2 to 2

Process 1 received 0

Process 1 sent 1 to 0

Process 1 exiting

Process 0 received 1

Process 0 received 2

Process 0 exiting



C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 4 c:\gdx\distmsg.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [43894,1] Process rank: 0

        Process OMPI jobid: [43894,1] Process rank: 1



 Data for node: chuuk   Num procs: 2

        Process OMPI jobid: [43894,1] Process rank: 2

        Process OMPI jobid: [43894,1] Process rank: 3



 =============================================================

connecting to chuuk

username:administrator

password:********

Save Credential?(Y/N) n

Process 0 of 4 initialized

Process 1 of 4 initialized

Process 0 sending 1 to 1

Process 0 sending 2 to 2

Process 1 received 0

Process 1 sent 1 to 0

Process 1 exiting

Process 0 sending 3 to 3

^C

C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 4 c:\gdx\distmsg.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [43310,1] Process rank: 0

        Process OMPI jobid: [43310,1] Process rank: 1



 Data for node: chuuk   Num procs: 1

        Process OMPI jobid: [43310,1] Process rank: 2



 Data for node: kosrae  Num procs: 1

        Process OMPI jobid: [43310,1] Process rank: 3



 =============================================================

connecting to chuuk

username:administrator

password:********

Save Credential?(Y/N) n

connecting to kosrae

username:administrator

password:********

Save Credential?(Y/N) n

Process 0 of 4 initialized

Process 1 of 4 initialized

Process 0 sending 1 to 1

Process 0 sending 2 to 2

Process 1 received 0

Process 1 sent 1 to 0

Process 1 exiting

Process 0 sending 3 to 3

Process 0 received 1

Process 0 received 2

Process 0 received 3

Process 0 exiting



C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 5 c:\gdx\distmsg.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [43590,1] Process rank: 0

        Process OMPI jobid: [43590,1] Process rank: 1



 Data for node: chuuk   Num procs: 2

        Process OMPI jobid: [43590,1] Process rank: 2

        Process OMPI jobid: [43590,1] Process rank: 3



 Data for node: kosrae  Num procs: 1

        Process OMPI jobid: [43590,1] Process rank: 4



 =============================================================

connecting to chuuk

username:administrator

password:********

Save Credential?(Y/N) n

connecting to kosrae

username:administrator

password:********

Save Credential?(Y/N) n

Process 0 of 5 initialized

Process 1 of 5 initialized

Process 0 sending 1 to 1

Process 0 sending 2 to 2

Process 1 received 0

Process 1 sent 1 to 0

Process 1 exiting

Process 0 sending 3 to 3

^C



The remote process which is the target of the hung send seems to generate
significant ongoing CPU activity and "Other" I/O.


*Workaround*

Curiously swapping the send/receive order solves the problem.

#include <stdio.h>

#define OMPI_IMPORTS

#include "C:\Program Files (x86)\OpenMPI_v1.5.4-x64\include\mpi.h"



int main(int argc, char *argv[])

{

   int rank, size, i, msg;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   MPI_Comm_size(MPI_COMM_WORLD, &size);

   printf("Process %i of %i initialized\n", rank, size);



   if (0 == rank) {

      for (i = 1; i < size; i++) {

         MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);

         printf("Process %i received %i\n", rank, msg);

      }

      for (i = 1; i < size; i++) {

         printf("Process %i sending %i to %i\n", rank, i, i);

         MPI_Send(&rank, 1, MPI_INT, i, 0, MPI_COMM_WORLD);

      }

   }

   else {

      MPI_Send(&rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);

      printf("Process %i sent %i to %i\n", rank, rank, 0);

      MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);

      printf("Process %i received %i\n", rank, msg);

   }



   printf("Process %i exiting\n", rank);

   MPI_Finalize();

   return 0;

}



C:\GDX>mpirun -v -display-map -hostfile mpihosts -np 5 c:\gdx\distmsgb.exe



 ========================   JOB MAP   ========================



 Data for node: Yap     Num procs: 2

        Process OMPI jobid: [43126,1] Process rank: 0

        Process OMPI jobid: [43126,1] Process rank: 1



 Data for node: chuuk   Num procs: 2

        Process OMPI jobid: [43126,1] Process rank: 2

        Process OMPI jobid: [43126,1] Process rank: 3



 Data for node: kosrae  Num procs: 1

        Process OMPI jobid: [43126,1] Process rank: 4



 =============================================================

connecting to chuuk

username:administrator

password:********

Save Credential?(Y/N) n

connecting to kosrae

username:administrator

password:********

Save Credential?(Y/N) n

Process 0 of 5 initialized

Process 1 of 5 initialized

Process 1 sent 1 to 0

Process 0 received 4

Process 0 received 1

Process 0 received 2

Process 0 received 3

Process 0 sending 1 to 1

Process 0 sending 2 to 2

Process 0 sending 3 to 3

Process 0 sending 4 to 4

Process 0 exiting

Process 1 received 0

Process 1 exiting

Reply via email to