One thing immediately leaps out at me - you are using a very old version of
Open MPI. I suspect Tim is testing on a much newer version, most likely the
1.2 version that is about to be released in the next day or two.

If it's at all possible, I would urge you to upgrade to 1.2 - if you would
rather not wait for the official release, the web site's latest beta is
virtually identical. I believe you will find the code much improved and
worth the change.

If you truly want to stick with the 1.1 family, then I would suggest you at
least update to the latest release there (we are currently at 1.1.4, and
1.1.5 - which is planned to be the last in that series - is also coming out
in the next day or two).

Hope that helps

Ralph



On 3/1/07 4:44 AM, "rozzen.vinc...@fr.thalesgroup.com"
<rozzen.vinc...@fr.thalesgroup.com> wrote:

> 
> Thanks for your help.
> Here is attached the output of ompi_info in the file ompi_info.txt.
> 
> -----Message d'origine-----
> De : users-boun...@open-mpi.org [mailto:users-boun...@open-mpi.org]De la
> part de Tim Prins
> Envoyé : jeudi 1 mars 2007 05:45
> À : Open MPI Users
> Objet : Re: [OMPI users] MPI_Comm_Spawn
> 
> 
> I have tried to reproduce this but cannot. I have been able to run your test
> program to over 100 spawns. So I can track this further, please send the
> output of ompi_info.
> 
> Thanks,
> 
> Tim
> 
> On Tuesday 27 February 2007 10:15 am, rozzen.vinc...@fr.thalesgroup.com wrote:
>> Do you know if there is a limit to the number of MPI_Comm_spawn we can use
>> in order to launch a program? I want to start and stop a program several
>> times (with the function MPI_Comm_spawn) but every time after  31
>> MPI_Comm_spawn, I get a "segmentation fault". Could you give me your point
>> of you to solve this problem?
>> Thanks
>> 
>> /*file .c : spawned  the file Exe*/
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h>
>> #include "mpi.h"
>> #include <pthread.h>
>> #include <signal.h>
>> #include <sys/time.h>
>> #include <errno.h>
>> #define     EXE_TEST             "/home/workspace/test_spaw1/src/Exe"
>> 
>> 
>> 
>> int main( int argc, char **argv ) {
>> 
>>     long *lpBufferMpi;
>>     MPI_Comm lIntercom;
>>     int lErrcode;
>>     MPI_Comm lCommunicateur;
>>     int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
>> NiveauThreadObtenu,lTailleBuffer; int *lpMessageEnvoi=&lMessageEnvoi;
>>     MPI_Status lStatus;             /*status de reception*/
>> 
>>      lIter=0;
>> 
>> 
>>     /* MPI environnement */
>> 
>>     printf("main*******************************\n");
>>     printf("main : Lancement MPI*\n");
>> 
>>     NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>     MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu
>> ); lpBufferMpi = calloc( 10000, sizeof(long));
>>     MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );
>> 
>>     while (lIter<1000){
>>         lIter ++;
>>         lIntercom=(MPI_Comm)-1 ;
>> 
>>         MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
>>                       0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
>>         printf( "%i main***MPI_Comm_spawn return : %d\n",lIter, lErrcode );
>> 
>>         if(lIntercom == (MPI_Comm)-1 ){
>>             printf("%i Intercom null\n",lIter);
>>             return 0;
>>         }
>>         MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
>>         MPI_Comm_rank( lCommunicateur, &lRangMain);
>>         lRangExe=1-lRangMain;
>> 
>>         printf("%i main***Rang main : %i   Rang exe : %i
>> \n",lIter,(int)lRangMain,(int)lRangExe); sleep(2);
>> 
>>     }
>> 
>> 
>>     /* Arret de l'environnement MPI */
>>     lTailleBuffer=10000* sizeof(long);
>>     MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
>>     MPI_Comm_free( &lCommunicateur );
>>     MPI_Finalize( );
>>     free( lpBufferMpi );
>> 
>>     printf( "Main = End .\n" );
>>     return 0;
>> 
>> }
>> /**************************************************************************
>> **********************/ Exe:
>> #include <string.h>
>> #include <stdlib.h>
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h>     /* pour sleep() */
>> #include <pthread.h>
>> #include <semaphore.h>
>> #include "mpi.h"
>> 
>> int main( int argc, char **argv ) {
>> /*1)pour communiaction MPI*/
>>     MPI_Comm lCommunicateur;        /*communicateur du process*/
>>     MPI_Comm CommParent;            /*Communiacteur parent à récupérer*/
>>     int lRank;                      /*rang du communicateur du process*/
>>     int lRangMain;            /*rang du séquenceur si lancé en mode
>> normal*/ int lTailleCommunicateur;       /*taille du communicateur;*/
>>     long *lpBufferMpi;              /*buffer pour message*/
>>     int lBufferSize;                /*taille du buffer*/
>> 
>>     /*2) pour les thread*/
>>     int NiveauThreadVoulu, NiveauThreadObtenu;
>> 
>> 
>>     lCommunicateur   = (MPI_Comm)-1;
>>     NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>     int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>> &NiveauThreadObtenu );
>> 
>>     if (erreur!=0){
>>         printf("erreur\n");
>>         free( lpBufferMpi );
>>         return -1;
>>     }
>> 
>>    /*2) Attachement à un buffer pour le message*/
>>     lBufferSize=10000 * sizeof(long);
>>     lpBufferMpi = calloc( 10000, sizeof(long));
>>     erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );
>> 
>>     if (erreur!=0){
>>         printf("erreur\n");
>>         free( lpBufferMpi );
>>         return -1;
>>     }
>> 
>>     printf( "Exe : Lance \n" );
>>     MPI_Comm_get_parent(&CommParent);
>>     MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
>>     MPI_Comm_rank( lCommunicateur, &lRank );
>>     MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
>>     lRangMain   =1-lRank;
>>     printf( "Exe: lRankExe  = %d   lRankMain  = %d\n", lRank , lRangMain,
>> lTailleCommunicateur);
>> 
>>     sleep(1);
>>     MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
>>     MPI_Comm_free( &lCommunicateur );
>>     MPI_Finalize( );
>>     free( lpBufferMpi );
>>     printf( "Exe: Fin.\n\n\n" );
>> }
>> 
>> 
>> /**************************************************************************
>> **********************/ result :
>> main*******************************
>> main : Lancement MPI*
>> 1 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 1 main***Rang main : 0   Rang exe : 1
>> Exe: lRankExe  = 1   lRankMain  = 0
>> Exe: Fin.
>> 
>> 
>> 2 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 2 main***Rang main : 0   Rang exe : 1
>> Exe: lRankExe  = 1   lRankMain  = 0
>> Exe: Fin.
>> 
>> 
>> 3 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 3 main***Rang main : 0   Rang exe : 1
>> Exe: lRankExe  = 1   lRankMain  = 0
>> Exe: Fin.
>> 
>> ....
>> 
>> 30 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 30 main***Rang main : 0   Rang exe : 1
>> Exe: lRankExe  = 1   lRankMain  = 0
>> Exe: Fin.
>> 
>> 
>> 31 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 31 main***Rang main : 0   Rang exe : 1
>> Exe: lRankExe  = 1   lRankMain  = 0
>> Erreur de segmentation
>> 
>> 
>> 
>> _______________________________________________
>> users mailing list
>> us...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
> 
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users
> 
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users



Reply via email to