Hello, i'm posting here another problem of my installation
I wanted to benchmark the differences between tcp and openib transport
if i run a simple non mpi application i get
randori ~ # mpirun --mca btl tcp,self -np 2 -host randori -host
tatami hostname
randori
tatami
but as soon as i switch to my benchmark program i have
mpirun --mca btl tcp,self -np 2 -host randori -host tatami graph
Master thread reporting
matrix size 33554432 kB, time is in [us]
and instead of starting the send/receive functions it just hangs
there; i also checked the transmitted packets with wireshark but
after the handshake no more packets are exchanged
I read in the archives that there were some problems in this area
and so i tried what was suggested in previous emails
mpirun --mca btl ^openib -np 2 -host randori -host tatami graph
mpirun --mca pml ob1 --mca btl tcp,self -np 2 -host randori -host
tatami graph
gives exactly the same output as before (no mpisend/receive)
while the next commands gives something more interesting
mpirun --mca pml cm --mca btl tcp,self -np 2 -host randori -host
tatami graph
--------------------------------------------------------------------------
No available pml components were found!
This means that there are no components of this type installed on your
system or all the components reported that they could not be used.
This is a fatal error; your MPI process is likely to abort. Check the
output of the "ompi_info" command and ensure that components of this
type are available on your system. You may also wish to check the
value of the "component_path" MCA parameter and ensure that it has at
least one directory that contains valid MCA components.
--------------------------------------------------------------------------
[tatami:06619] PML cm cannot be selected
mpirun noticed that job rank 0 with PID 6710 on node randori exited
on signal 15 (Terminated).
which is not possible as if i do ompi_info --param all there is the
CM pml component
MCA pml: cm (MCA v1.0, API v1.0, Component v1.2.8)
MCA pml: ob1 (MCA v1.0, API v1.0, Component v1.2.8)
my test program is quite simple, just a couple of MPI_Send and
MPI_Recv (just after the signature)
do you have any ideas that might help me?
thanks a lot
Vittorio
========================
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define M_COL 4096
#define M_ROW 524288
#define NUM_MSG 25
unsigned long int gigamatrix[M_ROW][M_COL];
int main (int argc, char *argv[]) {
int numtasks, rank, dest, source, rc, tmp, count, tag=1;
unsigned long int exp, exchanged;
unsigned long int i, j, e;
unsigned long matsize;
MPI_Status Stat;
struct timeval timing_start, timing_end;
double inittime = 0;
long int totaltime = 0;
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
if (rank == 0) {
fprintf (stderr, "Master thread reporting\n", numtasks - 1);
matsize = (long) M_COL * M_ROW / 64;
fprintf (stderr, "matrix size %d kB, time is in [us]\n",
matsize);
source = 1;
dest = 1;
/*warm up phase*/
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
for (e = 0; e < NUM_MSG; e++) {
exp = pow (2, e);
exchanged = 64 * exp;
/*timing of ops*/
gettimeofday (&timing_start, NULL);
rc = MPI_Send (&gigamatrix[0], exchanged,
MPI_UNSIGNED_LONG, dest, tag, MPI_COMM_WORLD);
rc = MPI_Recv (&gigamatrix[0], exchanged,
MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, &Stat);
gettimeofday (&timing_end, NULL);
totaltime = (timing_end.tv_sec - timing_start.tv_sec) *
1000000 + (timing_end.tv_usec - timing_start.tv_usec);
memset (&timing_start, 0, sizeof(struct timeval));
memset (&timing_end, 0, sizeof(struct timeval));
fprintf (stdout, "%d kB\t%d\n", exp, totaltime);
}
fprintf(stderr, "task complete\n");
} else {
if (rank >= 1) {
dest = 0;
source = 0;
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag,
MPI_COMM_WORLD);
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag,
MPI_COMM_WORLD);
rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag,
MPI_COMM_WORLD, &Stat);
for (e = 0; e < NUM_MSG; e++) {
exp = pow (2, e);
exchanged = 64 * exp;
rc = MPI_Recv (&gigamatrix[0], (unsigned)
exchanged, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, &Stat);
rc = MPI_Send (&gigamatrix[0], (unsigned)
exchanged, MPI_UNSIGNED_LONG, dest, tag, MPI_COMM_WORLD);
}
}
}
MPI_Finalize ();
return 0;
}
_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users