Actually, I have also tried with the same version you are using and cannot reproduce the behavior. Can you get a backtrace from the segmentation fault?

Also, as Ralph suggested, you might want to upgrade and see if the problem persists.

Tim

On Mar 1, 2007, at 8:52 AM, Ralph Castain wrote:

One thing immediately leaps out at me - you are using a very old version of Open MPI. I suspect Tim is testing on a much newer version, most likely the
1.2 version that is about to be released in the next day or two.

If it's at all possible, I would urge you to upgrade to 1.2 - if you would rather not wait for the official release, the web site's latest beta is virtually identical. I believe you will find the code much improved and
worth the change.

If you truly want to stick with the 1.1 family, then I would suggest you at least update to the latest release there (we are currently at 1.1.4, and 1.1.5 - which is planned to be the last in that series - is also coming out
in the next day or two).

Hope that helps

Ralph



On 3/1/07 4:44 AM, "rozzen.vinc...@fr.thalesgroup.com"
<rozzen.vinc...@fr.thalesgroup.com> wrote:


Thanks for your help.
Here is attached the output of ompi_info in the file ompi_info.txt.

-----Message d'origine-----
De : users-boun...@open-mpi.org [mailto:users-boun...@open-mpi.org] De la
part de Tim Prins
Envoyé : jeudi 1 mars 2007 05:45
À : Open MPI Users
Objet : Re: [OMPI users] MPI_Comm_Spawn


I have tried to reproduce this but cannot. I have been able to run your test program to over 100 spawns. So I can track this further, please send the
output of ompi_info.

Thanks,

Tim

On Tuesday 27 February 2007 10:15 am, rozzen.vinc...@fr.thalesgroup.com wrote:
Do you know if there is a limit to the number of MPI_Comm_spawn we can use in order to launch a program? I want to start and stop a program several
times (with the function MPI_Comm_spawn) but every time after  31
MPI_Comm_spawn, I get a "segmentation fault". Could you give me your point
of you to solve this problem?
Thanks

/*file .c : spawned  the file Exe*/
#include <stdio.h>
#include <malloc.h>
#include <unistd.h>
#include "mpi.h"
#include <pthread.h>
#include <signal.h>
#include <sys/time.h>
#include <errno.h>
#define EXE_TEST "/home/workspace/test_spaw1/src/ Exe"



int main( int argc, char **argv ) {

    long *lpBufferMpi;
    MPI_Comm lIntercom;
    int lErrcode;
    MPI_Comm lCommunicateur;
    int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
NiveauThreadObtenu,lTailleBuffer; int *lpMessageEnvoi=&lMessageEnvoi;
    MPI_Status lStatus;             /*status de reception*/

     lIter=0;


    /* MPI environnement */

    printf("main*******************************\n");
    printf("main : Lancement MPI*\n");

    NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu
); lpBufferMpi = calloc( 10000, sizeof(long));
    MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );

    while (lIter<1000){
        lIter ++;
        lIntercom=(MPI_Comm)-1 ;

        MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
                      0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
printf( "%i main***MPI_Comm_spawn return : %d\n",lIter, lErrcode );

        if(lIntercom == (MPI_Comm)-1 ){
            printf("%i Intercom null\n",lIter);
            return 0;
        }
        MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
        MPI_Comm_rank( lCommunicateur, &lRangMain);
        lRangExe=1-lRangMain;

        printf("%i main***Rang main : %i   Rang exe : %i
\n",lIter,(int)lRangMain,(int)lRangExe); sleep(2);

    }


    /* Arret de l'environnement MPI */
    lTailleBuffer=10000* sizeof(long);
    MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
    MPI_Comm_free( &lCommunicateur );
    MPI_Finalize( );
    free( lpBufferMpi );

    printf( "Main = End .\n" );
    return 0;

}
/ ******************************************************************** ******
**********************/ Exe:
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <malloc.h>
#include <unistd.h>     /* pour sleep() */
#include <pthread.h>
#include <semaphore.h>
#include "mpi.h"

int main( int argc, char **argv ) {
/*1)pour communiaction MPI*/
    MPI_Comm lCommunicateur;        /*communicateur du process*/
MPI_Comm CommParent; /*Communiacteur parent à récupérer*/ int lRank; /*rang du communicateur du process*/
    int lRangMain;            /*rang du séquenceur si lancé en mode
normal*/ int lTailleCommunicateur; /*taille du communicateur;*/
    long *lpBufferMpi;              /*buffer pour message*/
    int lBufferSize;                /*taille du buffer*/

    /*2) pour les thread*/
    int NiveauThreadVoulu, NiveauThreadObtenu;


    lCommunicateur   = (MPI_Comm)-1;
    NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
    int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
&NiveauThreadObtenu );

    if (erreur!=0){
        printf("erreur\n");
        free( lpBufferMpi );
        return -1;
    }

   /*2) Attachement à un buffer pour le message*/
    lBufferSize=10000 * sizeof(long);
    lpBufferMpi = calloc( 10000, sizeof(long));
    erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );

    if (erreur!=0){
        printf("erreur\n");
        free( lpBufferMpi );
        return -1;
    }

    printf( "Exe : Lance \n" );
    MPI_Comm_get_parent(&CommParent);
    MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
    MPI_Comm_rank( lCommunicateur, &lRank );
    MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
    lRangMain   =1-lRank;
printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank , lRangMain,
lTailleCommunicateur);

    sleep(1);
    MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
    MPI_Comm_free( &lCommunicateur );
    MPI_Finalize( );
    free( lpBufferMpi );
    printf( "Exe: Fin.\n\n\n" );
}


/ ******************************************************************** ******
**********************/ result :
main*******************************
main : Lancement MPI*
1 main***MPI_Comm_spawn return : 0
Exe : Lance
1 main***Rang main : 0   Rang exe : 1
Exe: lRankExe  = 1   lRankMain  = 0
Exe: Fin.


2 main***MPI_Comm_spawn return : 0
Exe : Lance
2 main***Rang main : 0   Rang exe : 1
Exe: lRankExe  = 1   lRankMain  = 0
Exe: Fin.


3 main***MPI_Comm_spawn return : 0
Exe : Lance
3 main***Rang main : 0   Rang exe : 1
Exe: lRankExe  = 1   lRankMain  = 0
Exe: Fin.

....

30 main***MPI_Comm_spawn return : 0
Exe : Lance
30 main***Rang main : 0   Rang exe : 1
Exe: lRankExe  = 1   lRankMain  = 0
Exe: Fin.


31 main***MPI_Comm_spawn return : 0
Exe : Lance
31 main***Rang main : 0   Rang exe : 1
Exe: lRankExe  = 1   lRankMain  = 0
Erreur de segmentation



_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users

_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users

_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users



_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users


Reply via email to