One thing immediately leaps out at me - you are using a very old version of Open MPI. I suspect Tim is testing on a much newer version, most likely the 1.2 version that is about to be released in the next day or two.
If it's at all possible, I would urge you to upgrade to 1.2 - if you would rather not wait for the official release, the web site's latest beta is virtually identical. I believe you will find the code much improved and worth the change. If you truly want to stick with the 1.1 family, then I would suggest you at least update to the latest release there (we are currently at 1.1.4, and 1.1.5 - which is planned to be the last in that series - is also coming out in the next day or two). Hope that helps Ralph On 3/1/07 4:44 AM, "rozzen.vinc...@fr.thalesgroup.com" <rozzen.vinc...@fr.thalesgroup.com> wrote: > > Thanks for your help. > Here is attached the output of ompi_info in the file ompi_info.txt. > > -----Message d'origine----- > De : users-boun...@open-mpi.org [mailto:users-boun...@open-mpi.org]De la > part de Tim Prins > Envoyé : jeudi 1 mars 2007 05:45 > À : Open MPI Users > Objet : Re: [OMPI users] MPI_Comm_Spawn > > > I have tried to reproduce this but cannot. I have been able to run your test > program to over 100 spawns. So I can track this further, please send the > output of ompi_info. > > Thanks, > > Tim > > On Tuesday 27 February 2007 10:15 am, rozzen.vinc...@fr.thalesgroup.com wrote: >> Do you know if there is a limit to the number of MPI_Comm_spawn we can use >> in order to launch a program? I want to start and stop a program several >> times (with the function MPI_Comm_spawn) but every time after 31 >> MPI_Comm_spawn, I get a "segmentation fault". Could you give me your point >> of you to solve this problem? >> Thanks >> >> /*file .c : spawned the file Exe*/ >> #include <stdio.h> >> #include <malloc.h> >> #include <unistd.h> >> #include "mpi.h" >> #include <pthread.h> >> #include <signal.h> >> #include <sys/time.h> >> #include <errno.h> >> #define EXE_TEST "/home/workspace/test_spaw1/src/Exe" >> >> >> >> int main( int argc, char **argv ) { >> >> long *lpBufferMpi; >> MPI_Comm lIntercom; >> int lErrcode; >> MPI_Comm lCommunicateur; >> int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu, >> NiveauThreadObtenu,lTailleBuffer; int *lpMessageEnvoi=&lMessageEnvoi; >> MPI_Status lStatus; /*status de reception*/ >> >> lIter=0; >> >> >> /* MPI environnement */ >> >> printf("main*******************************\n"); >> printf("main : Lancement MPI*\n"); >> >> NiveauThreadVoulu = MPI_THREAD_MULTIPLE; >> MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu >> ); lpBufferMpi = calloc( 10000, sizeof(long)); >> MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) ); >> >> while (lIter<1000){ >> lIter ++; >> lIntercom=(MPI_Comm)-1 ; >> >> MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL, >> 0, MPI_COMM_WORLD, &lIntercom, &lErrcode ); >> printf( "%i main***MPI_Comm_spawn return : %d\n",lIter, lErrcode ); >> >> if(lIntercom == (MPI_Comm)-1 ){ >> printf("%i Intercom null\n",lIter); >> return 0; >> } >> MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur ); >> MPI_Comm_rank( lCommunicateur, &lRangMain); >> lRangExe=1-lRangMain; >> >> printf("%i main***Rang main : %i Rang exe : %i >> \n",lIter,(int)lRangMain,(int)lRangExe); sleep(2); >> >> } >> >> >> /* Arret de l'environnement MPI */ >> lTailleBuffer=10000* sizeof(long); >> MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer ); >> MPI_Comm_free( &lCommunicateur ); >> MPI_Finalize( ); >> free( lpBufferMpi ); >> >> printf( "Main = End .\n" ); >> return 0; >> >> } >> /************************************************************************** >> **********************/ Exe: >> #include <string.h> >> #include <stdlib.h> >> #include <stdio.h> >> #include <malloc.h> >> #include <unistd.h> /* pour sleep() */ >> #include <pthread.h> >> #include <semaphore.h> >> #include "mpi.h" >> >> int main( int argc, char **argv ) { >> /*1)pour communiaction MPI*/ >> MPI_Comm lCommunicateur; /*communicateur du process*/ >> MPI_Comm CommParent; /*Communiacteur parent à récupérer*/ >> int lRank; /*rang du communicateur du process*/ >> int lRangMain; /*rang du séquenceur si lancé en mode >> normal*/ int lTailleCommunicateur; /*taille du communicateur;*/ >> long *lpBufferMpi; /*buffer pour message*/ >> int lBufferSize; /*taille du buffer*/ >> >> /*2) pour les thread*/ >> int NiveauThreadVoulu, NiveauThreadObtenu; >> >> >> lCommunicateur = (MPI_Comm)-1; >> NiveauThreadVoulu = MPI_THREAD_MULTIPLE; >> int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, >> &NiveauThreadObtenu ); >> >> if (erreur!=0){ >> printf("erreur\n"); >> free( lpBufferMpi ); >> return -1; >> } >> >> /*2) Attachement à un buffer pour le message*/ >> lBufferSize=10000 * sizeof(long); >> lpBufferMpi = calloc( 10000, sizeof(long)); >> erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize ); >> >> if (erreur!=0){ >> printf("erreur\n"); >> free( lpBufferMpi ); >> return -1; >> } >> >> printf( "Exe : Lance \n" ); >> MPI_Comm_get_parent(&CommParent); >> MPI_Intercomm_merge( CommParent, 1, &lCommunicateur ); >> MPI_Comm_rank( lCommunicateur, &lRank ); >> MPI_Comm_size( lCommunicateur, &lTailleCommunicateur ); >> lRangMain =1-lRank; >> printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank , lRangMain, >> lTailleCommunicateur); >> >> sleep(1); >> MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize ); >> MPI_Comm_free( &lCommunicateur ); >> MPI_Finalize( ); >> free( lpBufferMpi ); >> printf( "Exe: Fin.\n\n\n" ); >> } >> >> >> /************************************************************************** >> **********************/ result : >> main******************************* >> main : Lancement MPI* >> 1 main***MPI_Comm_spawn return : 0 >> Exe : Lance >> 1 main***Rang main : 0 Rang exe : 1 >> Exe: lRankExe = 1 lRankMain = 0 >> Exe: Fin. >> >> >> 2 main***MPI_Comm_spawn return : 0 >> Exe : Lance >> 2 main***Rang main : 0 Rang exe : 1 >> Exe: lRankExe = 1 lRankMain = 0 >> Exe: Fin. >> >> >> 3 main***MPI_Comm_spawn return : 0 >> Exe : Lance >> 3 main***Rang main : 0 Rang exe : 1 >> Exe: lRankExe = 1 lRankMain = 0 >> Exe: Fin. >> >> .... >> >> 30 main***MPI_Comm_spawn return : 0 >> Exe : Lance >> 30 main***Rang main : 0 Rang exe : 1 >> Exe: lRankExe = 1 lRankMain = 0 >> Exe: Fin. >> >> >> 31 main***MPI_Comm_spawn return : 0 >> Exe : Lance >> 31 main***Rang main : 0 Rang exe : 1 >> Exe: lRankExe = 1 lRankMain = 0 >> Erreur de segmentation >> >> >> >> _______________________________________________ >> users mailing list >> us...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/users > > _______________________________________________ > users mailing list > us...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/users > > _______________________________________________ > users mailing list > us...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/users