All,

It seems there is a regression in the handling of dynamic windows between Open MPI 1.10.3 and 2.0.0. I am attaching a test case that works fine with Open MPI 1.8.3 and fail with version 2.0.0 with the following output:

===
[0] MPI_Get 0 -> 3200 on first memory region
[cl3fr1:7342] *** An error occurred in MPI_Get
[cl3fr1:7342] *** reported by process [908197889,0]
[cl3fr1:7342] *** on win rdma window 3
[cl3fr1:7342] *** MPI_ERR_RMA_RANGE: invalid RMA address range
[cl3fr1:7342] *** MPI_ERRORS_ARE_FATAL (processes in this win will now abort,
[cl3fr1:7342] ***    and potentially your MPI job)
===

Expected output is:
===
[0] MPI_Get 0 -> 100 on first memory region:
[0] Done.
[0] MPI_Get 0 -> 100 on second memory region:
[0] Done.
===

The code allocates a dynamic window and attaches two memory regions to it before accessing both memory regions using MPI_Get. With Open MPI 2.0.0, only access to the both memory regions fails. Access to the first memory region only succeeds if the second memory region is not attached. With Open MPI 1.10.3, all MPI operations succeed.

Please let me know if you need any additional information or think that my code example is not standard compliant.

Best regards
Joseph


--
Dipl.-Inf. Joseph Schuchart
High Performance Computing Center Stuttgart (HLRS)
Nobelstr. 19
D-70569 Stuttgart

Tel.: +49(0)711-68565890
Fax: +49(0)711-6856832
E-Mail: schuch...@hlrs.de

/*
 * mpi_dynamic_win.cc
 *
 *  Created on: Aug 24, 2016
 *      Author: joseph
 */

#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>

static int allocate_shared(size_t bufsize, MPI_Win win, MPI_Aint *disp_set) {
  int ret;
  char *sub_mem;
  MPI_Aint disp;
  MPI_Info win_info;
  MPI_Info_create(&win_info);
  MPI_Info_set(win_info, "alloc_shared_noncontig", "true");

  sub_mem = malloc(bufsize * sizeof(char));

  /* Attach the allocated shared memory to the dynamic window */
  ret = MPI_Win_attach(win, sub_mem, bufsize);

  if (ret != MPI_SUCCESS) {
    printf("MPI_Win_attach failed!\n");
    return -1;
  }

  /* Get the local address */
  ret = MPI_Get_address(sub_mem, &disp);

  if (ret != MPI_SUCCESS) {
    printf("MPI_Get_address failed!\n");
    return -1;
  }

  /* Publish addresses */
  ret = MPI_Allgather(&disp, 1, MPI_AINT, disp_set, 1, MPI_AINT, MPI_COMM_WORLD);

  if (ret != MPI_SUCCESS) {
    printf("MPI_Allgather failed!\n");
    return -1;
  }

  MPI_Info_free(&win_info);

  return 0;
}

int main(int argc, char **argv)
{
  MPI_Win win;
  const size_t nelems = 10*10;
  const size_t bufsize = nelems * sizeof(double);
  MPI_Aint   *disp_set, *disp_set2;
  int rank, size;

  double buf[nelems];

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  disp_set  = (MPI_Aint*) malloc(size * sizeof(MPI_Aint));
  disp_set2 = (MPI_Aint*) malloc(size * sizeof(MPI_Aint));

  int ret = MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);
  if (ret != MPI_SUCCESS) {
    printf("MPI_Win_create_dynamic failed!\n");
    exit(1);
  }

  
  MPI_Win_lock_all (0, win);

  /* Allocate two shared windows */
  allocate_shared(bufsize, win, disp_set);  
  allocate_shared(bufsize, win, disp_set2);  

  /* Initiate a get */
  {
    int elem;
    int neighbor = (rank + 1) / size;
    if (rank == 0) printf("[%i] MPI_Get 0 -> %zu on first memory region: \n", rank, nelems);
    for (elem = 0; elem < nelems -1; elem++) {
      MPI_Aint off = elem * sizeof(double);
      //MPI_Aint disp = MPI_Aint_add(disp_set[neighbor], off);
      MPI_Aint disp = disp_set[neighbor] + off;
      MPI_Get(&buf[elem], sizeof(double), MPI_BYTE, neighbor, disp, sizeof(double), MPI_BYTE, win);
    }
    MPI_Win_flush(neighbor, win);
    if (rank == 0) printf("[%i] Done.\n", rank);
  }


  MPI_Barrier(MPI_COMM_WORLD);

  {
    int elem;
    int neighbor = (rank + 1) / size;
    if (rank == 0) printf("[%i] MPI_Get 0 -> %zu on second memory region: \n", rank, nelems);
    for (elem = 0; elem < nelems; elem++) {
      MPI_Aint off = elem * sizeof(double);
      //MPI_Aint disp = MPI_Aint_add(disp_set2[neighbor], off);
      MPI_Aint disp = disp_set[neighbor] + off;
      MPI_Get(&buf[elem], sizeof(double), MPI_BYTE, neighbor, disp, sizeof(double), MPI_BYTE, win);
    }
    MPI_Win_flush(neighbor, win);
    if (rank == 0) printf("[%i] Done.\n", rank);
  }
  MPI_Barrier(MPI_COMM_WORLD);


  MPI_Win_unlock_all (win);

  free(disp_set);
  free(disp_set2);

  MPI_Finalize();
  return 0;
}

_______________________________________________
users mailing list
users@lists.open-mpi.org
https://rfd.newmexicoconsortium.org/mailman/listinfo/users

Reply via email to