Kernighan and Richie's C programming language manual - it goes all the way back 
to the original C definition.


On Nov 12, 2013, at 9:15 AM, Alex A. Granovsky <g...@classic.chem.msu.su> wrote:

> Hello,
> 
>> It seems that argv[argc] should always be NULL according to the
>> standard. So OMPI failure is not actually a bug!
> 
> could you please point to the exact document where this is explicitly stated?
> Otherwise, I'd assume this is a bug.
> 
> Kind regards,
> Alex Granovsky
> 
> 
> -----Original Message----- From: Matthieu Brucher
> Sent: Tuesday, November 12, 2013 8:56 PM
> To: Open MPI Users
> Subject: Re: [OMPI users] Segmentation fault in MPI_Init when passing 
> pointers allocated in main()
> 
> It seems that argv[argc] should always be NULL according to the
> standard. So OMPI failure is not actually a bug!
> 
> Cheers,
> 
> 2013/11/12 Matthieu Brucher <matthieu.bruc...@gmail.com>:
>> Interestingly enough, in ompi_mpi_init, opal_argv_join is called
>> without then array length, so I suppose that in the usual argc/argv
>> couple, you have an additional value to argv which may be NULL. So try
>> allocating 3 additional values, the last being NULL, and it may work.
>> 
>> Cheers,
>> 
>> Matthieu
>> 
>> 2013/11/12 Tang, Yu-Hang <yuhang_t...@brown.edu>:
>>> I tried the following code without CUDA, the error is still there:
>>> 
>>> #include "mpi.h"
>>> 
>>> #include <cstdlib>
>>> #include <cstring>
>>> #include <cmath>
>>> 
>>> int main(int argc, char **argv)
>>> {
>>>    // override command line arguments to make sure cudaengine get the
>>> correct one
>>>    char **argv_new = new char*[ argc + 2 ];
>>>    for( int i = 0 ; i < argc ; i++ )
>>>    {
>>>        argv_new[i] = new char[ strlen( argv[i] ) + 1 ];
>>>        strcpy( argv_new[i], argv[i] );
>>>    }
>>>    argv_new[ argc   ] = new char[ 32 ];
>>>    argv_new[ argc+1 ] = new char[ 32 ];
>>>    strcpy( argv_new[argc],   "-device" );
>>>    sprintf( argv_new[argc+1], "%d", 0 );
>>> 
>>>    argc += 2;
>>>    argv = argv_new;
>>> 
>>>    MPI_Init(&argc,&argv);
>>> 
>>>    // do something...
>>> 
>>>    MPI_Finalize();
>>> 
>>>    for( int i = 0 ; i < argc ; i++ ) delete [] argv[i];
>>>    delete [] argv;
>>> }
>>> 
>>> At the end of the program the pointer stored in argv is exactly that of
>>> argv_new so this should not be a problem. Manually inserting printf tells me
>>> that the fault occured at MPI_Init. The code works fine if I use
>>> MPI_Init(NULL,NULL) instead. The same code also compiles and runs without a
>>> problem on my laptop with mpich2-1.4.
>>> 
>>> Best,
>>> Yu-Hang
>>> 
>>> 
>>> 
>>> On Tue, Nov 12, 2013 at 11:18 AM, Matthieu Brucher
>>> <matthieu.bruc...@gmail.com> wrote:
>>>> 
>>>> Hi,
>>>> 
>>>> Are you sure this is the correct code? This seems strange and not a good
>>>> idea:
>>>> 
>>>>   MPI_Init(&argc,&argv);
>>>> 
>>>>    // do something...
>>>> 
>>>>    for( int i = 0 ; i < argc ; i++ ) delete [] argv[i];
>>>>    delete [] argv;
>>>> 
>>>> Did you mean argc_new and argv_new instead?
>>>> Do you have the same error without CUDA?
>>>> 
>>>> Cheers,
>>>> 
>>>> Matthieu
>>>> 
>>>> 
>>>> 2013/11/12 Tang, Yu-Hang <yuhang_t...@brown.edu>:
>>>> > Hi,
>>>> >
>>>> > I tried to augment the command line argument list by allocating my own
>>>> > list
>>>> > of strings and passing them to MPI_Init, yet I got a segmentation > fault
>>>> > for
>>>> > both OpenMPI 1.6.3 and 1.7.2, while the code works fine with MPICH2. > 
>>>> > The
>>>> > code is:
>>>> >
>>>> > #include "mpi.h"
>>>> > #include "cuda_runtime.h"
>>>> > #include <cstdlib>
>>>> > #include <cstring>
>>>> > #include <cmath>
>>>> >
>>>> > int main(int argc, char **argv)
>>>> > {
>>>> >     int device = 0;
>>>> >     int skip = 0;
>>>> >     bool skipmode = false;
>>>> >     bool specified = false;
>>>> >     for( int i = 0 ; i < argc ; i++ )
>>>> >     {
>>>> >         if ( strcmp( argv[i], "-device" ) == 0 )
>>>> >         {
>>>> >             i++;
>>>> >             if ( argv[i][0] == '-' )
>>>> >             {
>>>> >                 skipmode = true;
>>>> >                 skip = fabs( atoi( argv[i] ) );
>>>> >             }
>>>> >             else
>>>> >             {
>>>> >                 skipmode = false;
>>>> >                 device = atoi( argv[i] );
>>>> >             }
>>>> >             specified = true;
>>>> >         }
>>>> >     }
>>>> >
>>>> >     if ( !specified || skipmode )
>>>> >     {
>>>> >         char* var;
>>>> >         int dev_count, local_rank = 0;
>>>> >         if ( (var = getenv("SLURM_LOCALID")) != NULL) local_rank =
>>>> > atoi(var);
>>>> >         else if( (var = getenv("MV2_COMM_WORLD_LOCAL_RANK"))  != NULL)
>>>> > local_rank = atoi(var);
>>>> >         else if( (var = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) != NULL)
>>>> > local_rank = atoi(var);
>>>> >         cudaGetDeviceCount( &dev_count );
>>>> >         if ( skipmode )
>>>> >         {
>>>> >             device = 0;
>>>> >             if ( device == skip ) local_rank++;
>>>> >             while( local_rank-- > 0 )
>>>> >             {
>>>> >                 device = (++device) % dev_count;
>>>> >                 if ( device == skip ) local_rank++;
>>>> >             }
>>>> >         }
>>>> >         else device = local_rank % dev_count;
>>>> >     }
>>>> >
>>>> >     // override command line arguments to make sure cudaengine get the
>>>> > correct one
>>>> >     char **argv_new = new char*[ argc + 2 ];
>>>> >     for( int i = 0 ; i < argc ; i++ )
>>>> >     {
>>>> >         argv_new[i] = new char[ strlen( argv[i] ) + 1 ];
>>>> >         strcpy( argv_new[i], argv[i] );
>>>> >     }
>>>> >     argv_new[ argc   ] = new char[ 32 ];
>>>> >     argv_new[ argc+1 ] = new char[ 32 ];
>>>> >     strcpy( argv_new[argc],   "-device" );
>>>> >     sprintf( argv_new[argc+1], "%d", device );
>>>> >     argc += 2;
>>>> >     argv = argv_new;
>>>> >
>>>> >     cudaSetDevice( device );
>>>> >
>>>> >     MPI_Init(&argc,&argv);
>>>> >
>>>> >     // do something...
>>>> >
>>>> >     MPI_Finalize();
>>>> >
>>>> >     cudaDeviceReset();
>>>> >     for( int i = 0 ; i < argc ; i++ ) delete [] argv[i];
>>>> >     delete [] argv;
>>>> > }
>>>> >
>>>> > When compiled using nvcc -ccbin mpic++, The error I got was:
>>>> >
>>>> > [jueying:16317] *** Process received signal ***
>>>> > [jueying:16317] Signal: Segmentation fault (11)
>>>> > [jueying:16317] Signal code: Address not mapped (1)
>>>> > [jueying:16317] Failing at address: 0x21
>>>> > [jueying:16317] [ 0] /usr/lib64/libpthread.so.0() [0x39e5e0f000]
>>>> > [jueying:16317] [ 1] /usr/lib64/libc.so.6() [0x39e5760551]
>>>> > [jueying:16317] [ 2]
>>>> > /opt/openmpi/1.7.2/lib/libopen-pal.so.5(opal_argv_join+0x39)
>>>> > [0x7f460b993079]
>>>> > [jueying:16317] [ 3]
>>>> > /opt/openmpi/1.7.2/lib/libmpi.so.1(ompi_mpi_init+0x347)
>>>> > [0x7f460c106a57]
>>>> > [jueying:16317] [ 4] > /opt/openmpi/1.7.2/lib/libmpi.so.1(MPI_Init+0x16b)
>>>> > [0x7f460c12523b]
>>>> > [jueying:16317] [ 5] ./lmp_jueying() [0x40c035]
>>>> > [jueying:16317] [ 6] /usr/lib64/libc.so.6(__libc_start_main+0xf5)
>>>> > [0x39e5621a05]
>>>> > [jueying:16317] [ 7] ./lmp_jueying() [0x40dd21]
>>>> > [jueying:16317] *** End of error message ***
>>>> >
>>>> > Thanks for the help.
>>>> >
>>>> > Best regards,
>>>> > Yu-Hang Tang
>>>> >
>>>> > _______________________________________________
>>>> > users mailing list
>>>> > us...@open-mpi.org
>>>> > http://www.open-mpi.org/mailman/listinfo.cgi/users
>>>> 
>>>> 
>>>> 
>>>> --
>>>> Information System Engineer, Ph.D.
>>>> Blog: http://matt.eifelle.com
>>>> LinkedIn: http://www.linkedin.com/in/matthieubrucher
>>>> Music band: http://liliejay.com/
>>>> _______________________________________________
>>>> users mailing list
>>>> us...@open-mpi.org
>>>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>>> 
>>> 
>>> 
>>> 
>>> --
>>> Yu-Hang Tang
>>> Room 105, 37 Manning St
>>> Division of Applied Mathematics, Brown University
>>> Providence, RI 02912
>>> 
>>> _______________________________________________
>>> users mailing list
>>> us...@open-mpi.org
>>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>> 
>> 
>> 
>> --
>> Information System Engineer, Ph.D.
>> Blog: http://matt.eifelle.com
>> LinkedIn: http://www.linkedin.com/in/matthieubrucher
>> Music band: http://liliejay.com/
> 
> 
> 
> -- 
> Information System Engineer, Ph.D.
> Blog: http://matt.eifelle.com
> LinkedIn: http://www.linkedin.com/in/matthieubrucher
> Music band: http://liliejay.com/
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users 
> 
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users

Reply via email to