Hello list, when I run the attached example, which spawns a "slave" process with MPI_Comm_spawn(), I see the following:
nbock 19911 0.0 0.0 53980 2288 pts/0 S+ 07:42 0:00 /usr/local/openmpi-1.3.4-gcc-4.4.2/bin/mpirun -np 3 ./master nbock 19912 92.1 0.0 158964 3868 pts/0 R+ 07:42 0:23 ./master nbock 19913 0.0 0.0 158960 3812 pts/0 S+ 07:42 0:00 ./master nbock 19914 0.0 0.0 158960 3800 pts/0 S+ 07:42 0:00 ./master nbock 19929 91.1 0.0 158964 3896 pts/0 R+ 07:42 0:20 ./slave arg1 arg2 nbock 19930 95.8 0.0 158964 3900 pts/0 R+ 07:42 0:22 ./slave arg1 arg2 nbock 19931 94.7 0.0 158964 3896 pts/0 R+ 07:42 0:21 ./slave arg1 arg2 The third column is the CPU usage according to top. I notice 3 master processes, which I attribute to the fact that MPI_Comm_spawn really fork()s and then spawns, but that's my uneducated guess. What I don't understand is why PID 19912 is using any CPU resources at all. It's supposed to be waiting at the MPI_Barrier() for the slaves to finish. What is PID 19912 doing? Some more information: $ uname -a Linux mujo 2.6.31-gentoo-r6 #2 SMP PREEMPT Fri Dec 4 07:08:07 MST 2009 x86_64 Intel(R) Core(TM)2 Quad CPU Q8200 @ 2.33GHz GenuineIntel GNU/Linux openmpi version 1.3.4 gcc version 4.4.2 nick
#include <stdio.h> #include <stdlib.h> #include <mpi.h> int main (int argc, char **argv) { int rank; int size; int *error_codes; int spawn_counter = 0; char *slave_argv[] = { "arg1", "arg2", 0 }; MPI_Comm spawn; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { printf("[master] running on %i processors\n", size); while (1) { printf("[master] (%i) forking processes\n", spawn_counter++); error_codes = (int*) malloc(sizeof(int)*size); MPI_Comm_spawn("./slave", slave_argv, size, MPI_INFO_NULL, 0, MPI_COMM_SELF, &spawn, error_codes); printf("[master] waiting at barrier\n"); MPI_Barrier(spawn); free(error_codes); } } MPI_Finalize(); }
#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <mpi.h> int main (int argc, char **argv) { int rank; int size; int i, j; double temp; MPI_Comm spawn; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); printf("[slave %i] working\n", rank); for (i = 0; i < 10000; ++i) { for (j = 0; j < 500000; ++j) { temp = rand(); } } printf("[slave %i] waiting at barrier\n", rank); MPI_Comm_get_parent(&spawn); MPI_Barrier(spawn); MPI_Finalize(); }