All,
We are trying to combine MPI_Put and MPI_Win_flush on locked (using
MPI_Win_lock_all) dynamic windows to mimic a blocking put. The
application is (potentially) multi-threaded and we are thus relying on
MPI_THREAD_MULTIPLE support to be available.
When I try to use this combination (MPI_Put + MPI_Win_flush) in our
application, I am seeing threads occasionally hang in MPI_Win_flush,
probably waiting for some progress to happen. However, when I try to
create a small reproducer (attached, the original application has
multiple layers of abstraction), I am seeing fatal errors in
MPI_Win_flush if using more than one thread:
```
[beryl:18037] *** An error occurred in MPI_Win_flush
[beryl:18037] *** reported by process [4020043777,2]
[beryl:18037] *** on win pt2pt window 3
[beryl:18037] *** MPI_ERR_RMA_SYNC: error executing rma sync
[beryl:18037] *** MPI_ERRORS_ARE_FATAL (processes in this win will now
abort,
[beryl:18037] *** and potentially your MPI job)
```
I could only trigger this on dynamic windows with multiple concurrent
threads running.
So: Is this a valid MPI program (except for the missing clean-up at the
end ;))? It seems to run fine with MPICH but maybe they are more
tolerant to some programming errors...
If it is a valid MPI program, I assume there is some race condition in
MPI_Win_flush that leads to the fatal error (or the hang that I observe
otherwise)?
I tested this with OpenMPI 1.10.5 on single node Linux Mint 18.1 system
with stock kernel 4.8.0-36 (aka my laptop). OpenMPI and the test were
both compiled using GCC 5.3.0. I could not run it using OpenMPI 2.0.2
due to the fatal error in MPI_Win_create (which also applies to
MPI_Win_create_dynamic, see my other thread, not sure if they are related).
Please let me know if this is a valid use case and whether I can provide
you with additional information if required.
Many thanks in advance!
Cheers
Joseph
--
Dipl.-Inf. Joseph Schuchart
High Performance Computing Center Stuttgart (HLRS)
Nobelstr. 19
D-70569 Stuttgart
Tel.: +49(0)711-68565890
Fax: +49(0)711-6856832
E-Mail: schuch...@hlrs.de
#include <mpi.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <omp.h>
#include <assert.h>
static void
allocate_dynamic(size_t elsize, size_t count, MPI_Win *win, MPI_Aint *disp_set, char **b)
{
char *base;
MPI_Aint disp;
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, win) != MPI_SUCCESS) {
printf("Failed to create dynamic window!\n");
exit(1);
}
if (MPI_Alloc_mem(elsize*count, MPI_INFO_NULL, &base) != MPI_SUCCESS) {
printf("Failed to allocate memory!\n");
exit(1);
}
if (MPI_Win_attach(*win, base, elsize*count) != MPI_SUCCESS) {
printf("Failed to attach memory to dynamic window!\n");
exit(1);
}
MPI_Get_address(base, &disp);
printf("Offset at process %i: %p (%lu)\n", rank, base, disp);
MPI_Allgather(&disp, 1, MPI_AINT, disp_set, 1, MPI_AINT, MPI_COMM_WORLD);
MPI_Win_lock_all(0, *win);
*b = base;
}
static void
put_blocking(uint64_t value, int target, MPI_Aint offset, MPI_Win win)
{
MPI_Put(&value, 1, MPI_UNSIGNED_LONG, target, offset, 1, MPI_UNSIGNED_LONG, win);
MPI_Win_flush(target, win);
}
int main(int argc, char **argv)
{
int provided;
int rank, size;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
printf("MPI_THREAD_MULTIPLE supported: %s\n", (provided == MPI_THREAD_MULTIPLE) ? "yes" : "no" );
MPI_Win win;
char *base;
// every thread writes so many values to our neighbor
// the offset is controlled by the thread ID
int elem_per_thread = 10;
int num_threads = omp_get_num_threads();
MPI_Aint *disp_set = calloc(size, sizeof(MPI_Aint));
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
allocate_dynamic(sizeof(uint64_t), elem_per_thread*num_threads, &win, disp_set, &base);
int target = (rank + 1) % size;
#pragma omp parallel
{
int thread_num = omp_get_thread_num();
size_t offset = disp_set[target] + (elem_per_thread * thread_num)*sizeof(uint64_t);
for (int i = 0; i < elem_per_thread; i++) {
printf("[%i:%i] win[%zu => %zu + (%i * %i)*%zu] <= %i\n", rank, thread_num,
offset + (sizeof(uint64_t) * i), disp_set[target], elem_per_thread, thread_num,
sizeof(uint64_t), thread_num);
put_blocking(thread_num, target, offset + (sizeof(uint64_t) * i), win);
}
}
MPI_Barrier(MPI_COMM_WORLD);
// check for the result locally (not very sophisticated)
#pragma omp parallel
{
int thread_num = omp_get_thread_num();
assert(((uint64_t*)base)[(elem_per_thread * thread_num)] == thread_num);
}
MPI_Win_unlock_all(win);
// MPI_Win_free(&win);
free(disp_set);
MPI_Finalize();
return 0;
}
_______________________________________________
users mailing list
users@lists.open-mpi.org
https://rfd.newmexicoconsortium.org/mailman/listinfo/users