Hi,
I am trying to write a simple code which does the following -
A master process running on 'n' processors spawn 4 processes using the
MPI_Comm_spawn_multiple command. After spawning, the intercommunicator
between the master and the spawned processes are merged using
MPI_Intercomm_merge to create a new common intracommunicator for the
expanded set of processes. These steps are repeated over a loop - execute
the master processes, spawn new processes, and merge the communicators to
get a global communicator.
In this example the new processes are always spawned on the same 4 nodes.
After the loop is completed, when I call MPI_Finalize, I get a segmentation
fault. I do not get segmentation fault if I run the loop only once,
i.e.callMPI_Intercomm_merge only once.
Is there something wrong with my program or is it a known issue with
MPI_Intercomm_merge when called multiple times?
I have pasted the sample code below. It has 3 files - master.c, spawn.c,
hello.c. I will be glad to clarify if anything looks confusing. Any help
will be appreciated.
/****************************************************************************************/
Master function. This function calls MPI_Comm_spawn_multiple the first time.
(master.c)
/*****************************************************************************************/
/*Global variables*/
MPI_Comm grid_comm; /*new global communicator after spawning*/
int loop=0; /*number of iterations*/
int newprocess = -1; /* Variable to identify whether the current process is
an old process or a spawned process.*/
int main (int argc, char ** argv)
{
int size,rank;
MPI_Init(&argc, &argv);
grid_comm=MPI_COMM_WORLD;
newprocess=0;
for( ;loop < 2; loop++){
fprintf(stdout,"\n\nLOOP in main =%d\n",loop);
mpicomm_spawn();
/*Broacasting the loop value to spawned processes so that the new
processes
join the next iteration with the correct loop value.*/
MPI_Bcast(&loop,1,MPI_INT,0,grid_comm);
MPI_Comm_size(grid_comm, &size);
MPI_Comm_rank(grid_comm, &rank);
}
fprintf(stdout,"Exiting...main..rank=%d\n",rank); fflush(stdout);
MPI_Barrier(grid_comm);
MPI_Comm_free(&grid_comm);
MPI_Finalize();
}
/**********************/
Spawning function (spawn.c)
/********************/
extern MPI_Comm grid_comm;
int mpicomm_spawn()
{
MPI_Comm parent, intercomm;
int rank, nprocs=4, size,nspawned;
MPI_Info info[4];
char *host = (char *) "host"; /*String to be stored as a key in MPI_Info*/
char *commands[4]; /*Stores the array of executable names to
be spawned*/
int maxprocs[4]; /*maximum number of processes that can be
spawned on each process.*/
char ***args=NULL; /*array of arguments for each executable*/
int i; /*loop counter*/
char nodenames[4][50];
MPI_Comm_get_parent(&parent);
if(newprocess==0)
{
/*Master processes*/
strcpy(nodenames[0],"n1009");
strcpy(nodenames[1],"n1010");
strcpy(nodenames[2],"n1011");
strcpy(nodenames[3],"n1012");
for(i=0;i<4;i++) {
commands[i]=(char*)malloc(sizeof(char)*50);
strcpy(commands[i],"./hello");
maxprocs[i]=1;
MPI_Info_create (&info[i]);
MPI_Info_set (info[i], host, nodenames[i]);
}
nspawned = MPI_Comm_spawn_multiple(nprocs, commands, args,
maxprocs, info, 0, grid_comm, &intercomm, MPI_ERRCODES_IGNORE);
MPI_Intercomm_merge(intercomm, 0, &grid_comm);
}
else
{
/* This part of the code is executed by the newly spawned process*/
newprocess=0;
MPI_Intercomm_merge(parent, 1, &grid_comm);
}
}
/*******************************************************/
Function that needs to be called while spawning (hello.c)
/********************************************************/
/*Global variables*/
MPI_Comm grid_comm; /*new global communicator after spawning*/
int loop=0; /*number of iterations*/
int newprocess = -1; /* Variable to identify whether the current process is
an old process or a spawned process.*/
int main (int argc, char **argv)
{
int myrank,size;
MPI_Init(&argc, &argv);
while(loop<2){
if(newprocess!=0){
newprocess=1;
mpicomm_spawn();
}
else
mpicomm_spawn();
MPI_Comm_rank(grid_comm, &myrank);
MPI_Bcast(&loop,1,MPI_INT,0,grid_comm);
fprintf(stdout,"\n\n<<<<<<<<<<LOOP in hello =
%d..Myrank=%d\n",loop,myrank);
MPI_Comm_size(grid_comm, &size);
loop++;
}
MPI_Comm_rank(grid_comm, &myrank);
fprintf(stdout,"Exiting worker..rank=%d\n",myrank);fflush(stdout);
MPI_Barrier(grid_comm);
MPI_Comm_free(&grid_comm);
MPI_Finalize();
}
Thanks.
Regarads,
Rajesh