Here's a chunk of code that reproduces the error everytime on my cluster If you call it with $((2**24)) as a parameter it should run fine, change it to $((2**27)) and it will stall
On Tue, Apr 5, 2011 at 11:24 AM, Terry Dontje <terry.don...@oracle.com>wrote: > It was asked during the community concall whether the below may be related > to ticket #2722 https://svn.open-mpi.org/trac/ompi/ticket/2722? > > --td > > On 04/04/2011 10:17 PM, David Zhang wrote: > > Any error messages? Maybe the nodes ran out of memory? I know MPI > implement some kind of buffering under the hood, so even though you're > sending array's over 2^26 in size, it may require more than that for MPI to > actually send it. > > On Mon, Apr 4, 2011 at 2:16 PM, Michael Di Domenico < > mdidomeni...@gmail.com> wrote: > >> Has anyone seen an issue where OpenMPI/Infiniband hangs when sending >> messages over 2^26 in size? >> >> For a reason i have not determined just yet machines on my cluster >> (OpenMPI v1.5 and Qlogic Stack/QDR IB Adapters) is failing to send >> array's over 2^26 in size via the AllToAll collective. (user code) >> >> Further testing seems to indicate that an MPI message over 2^26 fails >> (tested with IMB-MPI) >> >> Running the same test on a different older IB connected cluster seems >> to work, which would seem to indicate a problem with the infiniband >> drivers of some sort rather then openmpi (but i'm not sure). >> >> Any thoughts, directions, or tests? >> _______________________________________________ >> users mailing list >> us...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/users >> > > > > -- > David Zhang > University of California, San Diego > > > _______________________________________________ > users mailing > listusers@open-mpi.orghttp://www.open-mpi.org/mailman/listinfo.cgi/users > > > > -- > [image: Oracle] > Terry D. Dontje | Principal Software Engineer > Developer Tools Engineering | +1.781.442.2631 > Oracle * - Performance Technologies* > 95 Network Drive, Burlington, MA 01803 > Email terry.don...@oracle.com > > > > > _______________________________________________ > users mailing list > us...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/users >
#include <assert.h> #include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <mpi.h> typedef signed char int8; typedef unsigned char uint8; typedef short int16; typedef unsigned short uint16; typedef int int32; typedef unsigned int uint32; typedef long int64; typedef unsigned long uint64; #define I64(c) (c##L) #define UI64(c) (c##uL) #define _BR_RUNUP_ 128 #define _BR_LG_TABSZ_ 7 #define _BR_TABSZ_ (I64(1) << _BR_LG_TABSZ_) #define _ZERO64 UI64(0x0) #define _maskl(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) << (64-(x)))) #define _maskr(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) >> (64-(x)))) #define _BR_64STEP_(H,L,A,B) {\ uint64 x;\ x = H ^ (H << A) ^ (L >> (64 - A));\ H = L | (x >> (B - 64));\ L = x << (128 - B);\ } static uint64_t _rtc() { unsigned hi, lo, tmp; asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } typedef struct { uint64 hi, lo, ind; uint64 tab[_BR_TABSZ_]; } brand_t; static uint64 brand (brand_t *p) { uint64 hi=p->hi, lo=p->lo, i=p->ind, ret; ret = p->tab[i]; // 64-step a primitive trinomial LRS: 0, 45, 118 _BR_64STEP_(hi,lo,45,118); p->tab[i] = ret + hi; p->hi = hi; p->lo = lo; p->ind = hi & _maskr(_BR_LG_TABSZ_); return ret; } static void brand_init (brand_t *p, uint64 val) { int64 i; uint64 hi, lo; hi = UI64(0x9ccae22ed2c6e578) ^ val; lo = UI64(0xce4db5d70739bd22) & _maskl(118-64); // we 64-step 0, 33, 118 in the initialization for (i = 0; i < 64; i++) _BR_64STEP_(hi,lo,33,118); for (i = 0; i < _BR_TABSZ_; i++) { _BR_64STEP_(hi,lo,33,118); p->tab[i] = hi; } p->ind = _BR_TABSZ_/2; p->hi = hi; p->lo = lo; for (i = 0; i < _BR_RUNUP_; i++) brand(p); } void rubbish(brand_t* state, uint64_t n_words, uint64_t array[]) { uint64_t i; for (i = 0; i < n_words; i++) array[i] = brand(state); } void usage(const char* prog) { int me; MPI_Comm_rank(MPI_COMM_WORLD, &me); if (me == 0) fprintf(stderr, "usage: %s #bytes/process\n", prog); exit(2); } int main(int argc, char* argv[]) { brand_t state; int i_proc, n_procs, words_per_chunk, loop; size_t array_size; uint64_t* source; uint64_t* dest; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &n_procs); MPI_Comm_rank(MPI_COMM_WORLD, &i_proc); MPI_Datatype uint64_type = MPI_LONG; MPI_Aint foo = 0; MPI_Type_extent(uint64_type, &foo); assert(foo == 8); if (argc < 2) usage(argv[0]); array_size = strtoull(argv[1], NULL, 0); words_per_chunk = (array_size >> 3) / n_procs; if (i_proc == 0) printf("array size is 0x%zx bytes (%g)\n", array_size, (double)array_size); source = malloc(array_size); dest = malloc(array_size); if (source == NULL || dest == NULL) { fprintf(stderr, "process %d out of memory\n", i_proc); exit(1); } brand_init(&state, i_proc+1); rubbish(&state, array_size >> 3, source); printf("process %d starting\n", i_proc); for (loop = 0; loop < 3; loop++) { int64_t time = _rtc(); int err; err = MPI_Alltoall(source, words_per_chunk, uint64_type, dest, words_per_chunk, uint64_type, MPI_COMM_WORLD); time = _rtc() - time; if (err != MPI_SUCCESS) printf("process %d MPI error: %d\n", i_proc, err); else printf("process %d: time %g ticks/byte\n", i_proc, (double)time / array_size); } if (i_proc == 1) printf("%016" PRIx64 "\n", dest[0]); MPI_Finalize(); return 0; }