Hi, I am trying to distribute large amount of data using MPI.
When I exceed the certain data size the segmentation fault occurs. Here is my code, #include "mpi.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #define ARRAYSIZE 2000000 #define MASTER 0 int data[ARRAYSIZE]; int main(int argc, char* argv[]) { int numtasks, taskid, rc, dest, offset, i, j, tag1, tag2, source, chunksize, namelen; int mysum, sum; int update(int myoffset, int chunk, int myid); char myname[MPI_MAX_PROCESSOR_NAME]; MPI_Status status; double start, stop, time; double totaltime; FILE *fp; char line[128]; char element; int n; int k=0; /***** Initializations *****/ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); MPI_Get_processor_name(myname, &namelen); printf ("MPI task %d has started on host %s...\n", taskid, myname); chunksize = (ARRAYSIZE / numtasks); tag2 = 1; tag1 = 2; /***** Master task only ******/ if (taskid == MASTER){ /* Initialize the array */ sum = 0; for(i=0; i<ARRAYSIZE; i++) { data[i] = i * 1 ; sum = sum + data[i]; } printf("Initialized array sum = %d\n",sum); /* Send each task its portion of the array - master keeps 1st part */ offset = chunksize; for (dest=1; dest<numtasks; dest++) { MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD); MPI_Send(&data[offset], chunksize, MPI_INT, dest, tag2, MPI_COMM_WORLD); printf("Sent %d elements to task %d offset= %d\n",chunksize,dest,offset); offset = offset + chunksize; } /* Master does its part of the work */ offset = 0; mysum = update(offset, chunksize, taskid); /* Wait to receive results from each task */ for (i=1; i<numtasks; i++) { source = i; MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status); MPI_Recv(&data[offset], chunksize, MPI_INT, source, tag2, MPI_COMM_WORLD, &status); } /* Get final sum and print sample results */ MPI_Reduce(&mysum, &sum, 1, MPI_INT, MPI_SUM, MASTER, MPI_COMM_WORLD); /* printf("Sample results: \n"); offset = 0; for (i=0; i<numtasks; i++) { for (j=0; j<5; j++) printf(" %d",data[offset+j]);ARRAYSIZE printf("\n"); offset = offset + chunksize; }*/ printf("\n*** Final sum= %d ***\n",sum); } /* end of master section */ #include <stdlib.h> /***** Non-master tasks only *****/ if (taskid > MASTER) { /* Receive my portion of array from the master task */ start= MPI_Wtime(); source = MASTER; MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status); MPI_Recv(&data[offset], chunksize, MPI_INT, source, tag2,MPI_COMM_WORLD, &status); mysum = update(offset, chunksize, taskid); stop = MPI_Wtime(); time = stop -start; printf("time taken by process %d to recieve elements and caluclate own sum is = %lf seconds \n", taskid, time); totaltime = totaltime + time; /* Send my results back to the master task */ dest = MASTER; MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD); MPI_Send(&data[offset], chunksize, MPI_INT, MASTER, tag2, MPI_COMM_WORLD); MPI_Reduce(&mysum, &sum, 1, MPI_INT, MPI_SUM, MASTER, MPI_COMM_WORLD); } /* end of non-master */ // printf("Total time taken for distribution is - %lf seconds", totaltime); MPI_Finalize(); } /* end of main */ int update(int myoffset, int chunk, int myid) { int i,j; int mysum; int mydata[myoffset+chunk]; /* Perform addition to each of my array elements and keep my sum */ mysum = 0; /* printf("task %d has elements:",myid); for(j = myoffset; j<myoffset+chunk; j++){ printf("\t%d", data[j]); } printf("\n");*/ for(i=myoffset; i < myoffset + chunk; i++) { //data[i] = data[i] + i; mysum = mysum + data[i]; } printf("Task %d has sum = %d\n",myid,mysum); return(mysum); } When I run it with ARRAYSIZE = 2000000 The program works fine. But when I increase the size ARRAYSIZE = 20000000. The program ends with segmentation fault. I am running it on a cluster (machine 4 is master, machine 5,6 are slaves) and np=20 MPI task 0 has started on host machine4 MPI task 2 has started on host machine4 MPI task 3 has started on host machine4 MPI task 14 has started on host machine4 MPI task 8 has started on host machine6 MPI task 10 has started on host machine6 MPI task 13 has started on host machine4 MPI task 4 has started on host machine5 MPI task 6 has started on host machine5 MPI task 7 has started on host machine5 MPI task 16 has started on host machine5 MPI task 11 has started on host machine6 MPI task 12 has started on host machine4 MPI task 5 has started on hostmachine5 MPI task 17 has started on host machine5 MPI task 18 has started on host machine5 MPI task 15 has started on host machine4 MPI task 19 has started on host machine5 MPI task 1 has started on host machine4 MPI task 9 has started on host machine6 Initialized array sum = 542894464 Sent 1000000 elements to task 1 offset= 1000000 Task 1 has sum = 1055913696 time taken by process 1 to recieve elements and caluclate own sum is = 0.249345 seconds Sent 1000000 elements to task 2 offset= 2000000 Sent 1000000 elements to task 3 offset= 3000000 Task 2 has sum = 328533728 time taken by process 2 to recieve elements and caluclate own sum is = 0.274285 seconds Sent 1000000 elements to task 4 offset= 4000000 -------------------------------------------------------------------------- mpirun noticed that process rank 3 with PID 5695 on node machine4 exited on signal 11 (Segmentation fault). Any idea what could be wrong here? -- Best Regards, ROHAN DESHPANDE