That is strange, not sure why that is happening. I will try to reproduce with your program on my system. Also, perhaps you could rerun with –mca mpi_common_cuda_verbose 100 and send me that output.
Thanks From: users [mailto:users-boun...@open-mpi.org] On Behalf Of Xun Gong Sent: Sunday, January 11, 2015 11:41 PM To: us...@open-mpi.org Subject: [OMPI users] Segmentation fault when using CUDA Aware feature Hi, The OpenMpi I used is 1.8.4. I just tried to run a test program to see if the CUDA aware feature works. But I got the following errors. ss@ss-Inspiron-5439:~/cuda-workspace/cuda_mpi_ex1$ mpirun -np 2 s1 [ss-Inspiron-5439:32514] *** Process received signal *** [ss-Inspiron-5439:32514] Signal: Segmentation fault (11) [ss-Inspiron-5439:32514] Signal code: Address not mapped (1) [ss-Inspiron-5439:32514] Failing at address: 0x3 [ss-Inspiron-5439:32514] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x36c30)[0x7f74d7048c30] [ss-Inspiron-5439:32514] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x98a70)[0x7f74d70aaa70] [ss-Inspiron-5439:32514] [ 2] /usr/local/openmpi-1.8.4/lib/libopen-pal.so.6(opal_convertor_pack+0x187)[0x7f74d673f097] [ss-Inspiron-5439:32514] [ 3] /usr/local/openmpi-1.8.4/lib/openmpi/mca_btl_self.so(mca_btl_self_prepare_src+0xb8)[0x7f74ce196888] [ss-Inspiron-5439:32514] [ 4] /usr/local/openmpi-1.8.4/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_send_request_start_prepare+0x4c)[0x7f74cd2c183c] [ss-Inspiron-5439:32514] [ 5] /usr/local/openmpi-1.8.4/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_send+0x5ba)[0x7f74cd2b78aa] [ss-Inspiron-5439:32514] [ 6] /usr/local/openmpi-1.8.4/lib/libmpi.so.1(PMPI_Send+0xf2)[0x7f74d79602a2] [ss-Inspiron-5439:32514] [ 7] s1[0x408b1e] [ss-Inspiron-5439:32514] [ 8] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5)[0x7f74d7033ec5] [ss-Inspiron-5439:32514] [ 9] s1[0x4088e9] [ss-Inspiron-5439:32514] *** End of error message *** -------------------------------------------------------------------------- mpirun noticed that process rank 0 with PID 32514 on node ss-Inspiron-5439 exited on signal 11 (Segmentation fault). Looks like MPI_Send can not send CUDA buffer. But I already did the command ./configure --with-cuda for OpenMPI. The command I uesd is. ss@ss-Inspiron-5439:~/cuda-workspace/cuda_mpi_ex1$ nvcc -c k1.cu<http://k1.cu> ss@ss-Inspiron-5439:~/cuda-workspace/cuda_mpi_ex1$ mpic++ -c main.cc ss@ss-Inspiron-5439:~/cuda-workspace/cuda_mpi_ex1$ mpic++ -o s1 main.o k1.o -L/usr/local/cuda/lib64/ -lcudart ss@ss-Inspiron-5439:~/cuda-workspace/cuda_mpi_ex1$ mpirun -np 2 ./s1 The code I'm running is main.cc file #include<iostream> using namespace std; #include<mpi.h> #include"k1.h" #define vect_len 16 const int blocksize = 16; int main(int argv, char *argc[]) { int numprocs, myid; MPI_Status status; const int vect_size = vect_len*sizeof(int); int *vect1 = new int[vect_size]; int *vect2 = new int[vect_size]; int *result = new int[vect_size]; bool flag; int *ad; int *bd; MPI_Init(&argv, &argc); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); if(myid == 0) { for(int i = 0; i < vect_len; i++) { vect1[i] = i; vect2[i] = 2 * i; } } else { for(int i = 0; i < vect_len; i++) { vect1[i] = 2 * i; vect2[i] = i; } } initializeGPU(vect1, vect2, ad, bd, vect_size); if(myid == 0) { for(int i = 0; i < numprocs; i++) { MPI_Send(ad,vect_len, MPI_INT, i, 99, MPI_COMM_WORLD ); MPI_Send(bd,vect_len, MPI_INT, i, 99, MPI_COMM_WORLD ); } } else { MPI_Recv(ad,vect_len, MPI_INT, 0, 99, MPI_COMM_WORLD, &status ); MPI_Recv(bd,vect_len, MPI_INT, 0, 99, MPI_COMM_WORLD, &status ); } computeGPU(blocksize, vect_len, ad, bd, result, vect_size); //Verify flag = true; for(int i = 0; i < vect_len; i++) { if (i < 8) vect1[i] += vect2[i]; else vect1[i] -= vect2[i]; } for(int i = 0; i < vect_len; i++) { if( result[i] != vect1[i] ) { cout<<"the result ["<<i<<"] by m2s is"<<result[i]<<endl; cout<<"the result ["<<i<<"] of vector is"<<vect1[i]<<endl; cout << "Verification fail at " << i << endl; flag = false; break; } } if(flag) cout << "Verification passes." <<endl; // free device memory /* cudaFree( ad ); cudaFree( bd ); free(vect1); free(vect2); free(result); */ MPI_Finalize(); } k1.h file void initializeGPU(int *hostptr1, int *hostptr2, int *ad, int *bd, int vect_size); void computeGPU(int block_size, int vect_len, int *ptr1, int *ptr2, int* result, int vect_size); k1.cu<http://k1.cu> file #include"k1.h" __global__ void vect_add(int *a, int *b, int n) { int id = threadIdx.x; if (id < n) a[id] = a[id] + b[id]; else a[id] = a[id] - b[id]; } void initializeGPU(int *hostptr1, int *hostptr2, int *ad, int *bd, int vect_size) { // initialize device memory cudaMalloc( (void**)&ad, vect_size ); cudaMalloc( (void**)&bd, vect_size ); // copy data to device cudaMemcpy( ad, hostptr1, vect_size, cudaMemcpyHostToDevice ); cudaMemcpy( bd, hostptr2, vect_size, cudaMemcpyHostToDevice ); } void computeGPU(int block_size, int vect_len, int *ptr1, int *ptr2, int* result, int vect_size) { // setup block and grid size dim3 dimBlock( block_size, 1, 1); dim3 dimGrid( vect_len/block_size, 1 , 1); vect_add<<<dimGrid, dimBlock>>>(ptr1, ptr2, 8); cudaMemcpy( result, ptr1 , vect_size, cudaMemcpyDeviceToHost ); } Many Thanks for help, Xun ----------------------------------------------------------------------------------- This email message is for the sole use of the intended recipient(s) and may contain confidential information. Any unauthorized review, use, disclosure or distribution is prohibited. If you are not the intended recipient, please contact the sender by reply email and destroy all copies of the original message. -----------------------------------------------------------------------------------