Hi!

The attached code shows a problem when using mmap:ed buffer with MPI_Send and vader btl.

With OMPI_MCA_btl='^vader' it works in all cases i have tested.


Intel MPI also have problems with this, failing to receive the complete data, getting a NULL at position 6116 when the receiver is on another node.

(Haven't had time to build 1.10 yet...)

--
Ake Sandgren, HPC2N, Umea University, S-90187 Umea, Sweden
Internet: a...@hpc2n.umu.se   Phone: +46 90 7866134 Fax: +46 90-580 14
Mobile: +46 70 7716134 WWW: http://www.hpc2n.umu.se
#include <fcntl.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>

#define SIZE 1
#define DATA 2
#define ACK 3

#define MMAP 1
#define MALLOC 2

void master(int nodes, int me, int argc, char **argv);
void slave(int nodes, int me, int argc, char **argv);

void terminate(void)
{
    MPI_Abort(MPI_COMM_WORLD,1);
}

main(int argc, char **argv)
{

    int me, nodes;
    char hn[1024];

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
    MPI_Comm_size(MPI_COMM_WORLD, &nodes);

    gethostname(hn, 1023);
    fprintf(stderr, "Me %d: @ %s\n", me, hn);

    if (me == 0) {
	master(nodes, me, argc, argv);
    } else {
	slave(nodes, me, argc, argv);
    }

    MPI_Finalize();
}

void master(int nodes, int me, int argc, char **argv)
{
    int fd, ret, n, ack;
    struct stat st;
    char *buff;
    size_t l, sz, mmaplength = 1024*1024, malloclength = 1024*1024;
    MPI_Status status;
    int mode;

    mode = MMAP;
    if (argc > 1) {
    	if (strcmp(argv[1], "mmap") == 0) {
	    mode = MMAP;
	}
    	if (strcmp(argv[1], "malloc") == 0) {
	    mode = MALLOC;
	}
    }
    if (mode == MMAP) {
	fd = open("somelargefile", O_RDONLY);
	if (fd < 0) {
	    perror("Could nod open input file");
	    terminate();
	}
	ret = fstat(fd, &st);
	if (ret != 0) {
	    perror("Could not stat input file");
	    close(fd);
	    terminate();
	}
	sz = st.st_size;
	buff = mmap(NULL, mmaplength, PROT_READ, MAP_PRIVATE, fd, 0);
	if (buff == MAP_FAILED) {
	    perror("Failed to mmap input file");
	    terminate();
	}
    }
    if (mode == MALLOC) {
	buff = malloc(malloclength);
	memset(buff, 'N', malloclength);
    }

    for (n = 1; n < nodes; n++) {
	l = 34986;
	ack = 0;
	while (!ack) {
	    fprintf(stderr, "Start MPI_send of size (%ld) to %d\n", l, n);
	    ret = MPI_Send(&l, 1, MPI_LONG, n, SIZE, MPI_COMM_WORLD);
	    if (ret != MPI_SUCCESS) {
		fprintf(stderr, "MPI_Send of size (%ld) to %d failed with ret (%d)\n",
		    l, n, ret);
	    }
	    fprintf(stderr, "Start MPI_send of data (sz %ld) to %d\n", l, n);
	    ret = MPI_Send(buff, (int)l, MPI_BYTE, n, DATA, MPI_COMM_WORLD);
	    if (ret != MPI_SUCCESS) {
		fprintf(stderr, "MPI_Send of buff (sz %ld) to %d failed with ret (%d)\n",
		    l, n, ret);
	    }
	    fprintf(stderr, "Start MPI_recv of ack (sz %ld) to %d\n", l, n);
	    ret = MPI_Recv(&ack, 1, MPI_INT, n, ACK, MPI_COMM_WORLD, &status);
	    if (ret != MPI_SUCCESS) {
		fprintf(stderr, "MPI_Recv of ack from %d failed with ret (%d), status.error (%d)\n",
		    n, ret, status.MPI_ERROR);
	    }
	    if (!ack) {
		fprintf(stderr, "Master: slave %d got NULL in data, reducing size\n", n);
		l -= 32;
	    }
	}
    }

    if (mode == MMAP) {
	munmap(buff, mmaplength);
	close(fd);
    }
    if (mode == MALLOC) {
	free(buff);
    }

    fprintf(stderr, "Master returning\n");
    return;
}

void slave(int nodes, int me, int argc, char **argv)
{
    int ret, ack, i;
    size_t newdatasz, datasz = 0;
    char *data = NULL;
    MPI_Status status;

    ack = 0;
    while (!ack) {
	newdatasz = 0;
	fprintf(stderr, "Me %d: Start MPI_recv of size\n", me);
	ret = MPI_Recv(&newdatasz, 1, MPI_LONG, 0, SIZE, MPI_COMM_WORLD, &status);
	if (ret != MPI_SUCCESS) {
	    fprintf(stderr, "Me %d: Recv of newdatasz failed with ret (%d), status.error (%d)\n",
		me, ret, status.MPI_ERROR);
	}
	fprintf(stderr, "Me %d: got newdatasz (%ld)\n", me, newdatasz);
	if (newdatasz > datasz) {
	    data = realloc(data, newdatasz);
	}
	if (newdatasz == 0) {
	    fprintf(stderr, "Me %d: ERROR got newdatasz == 0!!\n", me);
	    terminate();
	}
	fprintf(stderr, "Me %d: Start MPI_recv of data (sz %ld)\n", me, newdatasz);
	ret = MPI_Recv(data, (int)newdatasz, MPI_CHAR, 0, DATA, MPI_COMM_WORLD, &status);
	if (ret != MPI_SUCCESS) {
	    fprintf(stderr, "Me %d: Recv of data (sz %ld) failed with ret (%d), status.error (%d)\n",
		me, newdatasz, ret, status.MPI_ERROR);
	}
	ack = 1;
	for (i = 0; i < newdatasz; i++) {
	    if (data[i] == '\0') {
		fprintf(stderr, "Me %d: Got NULL in data at pos %d\n", me, i);
		ack = 0;
		break;
	    }
	}
	fprintf(stderr, "Me %d: Start MPI_send of ack (%d) (sz %ld)\n", me, ack, newdatasz);
	ret = MPI_Send(&ack, 1, MPI_INT, 0, ACK, MPI_COMM_WORLD);
	if (ret != MPI_SUCCESS) {
	    fprintf(stderr, "Me %d: mpi_send ack returned error, %d\n",
		me, ret);
	}
	datasz = newdatasz;
    }

    fprintf(stderr, "Me %d: returning\n", me);
    return;
}

Reply via email to