Dear all,
I have a little piece of code shown below that initializes a
multidimensional Fortran array and performs:
- a non-blocking MPI_Iallreduce immediately followed by an MPI_Wait
- a blocking MPI_Allreduce
After both calls, it displays a few elements of the input and output
buffers.
In the output I am showing below, the first column gives the indices
of the element displayed, the second column gives the corresponding
element in the input array, the third column gives the corresponding
element in the output array. All the processes have the same input
array so the output should just be a multiple of the output.
I tried to compile and execute it with OpenMPI 4.0.1 on a single node,
I get:
coti@xxx:~$ mpiexec -n 4 test_allreduce
Rank 3 / 4
Rank 1 / 4
Rank 0 / 4
Rank 2 / 4
Non-blocking
1,1,1,1 5 1252991616
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 24
2,1,1,1 6 21197
----
Blocking
1,1,1,1 5 20
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 24
2,1,1,1 6 24
----
I just cloned the master branch of the Git repository and compiled it
(hash db52da40c379610360676f225cd7c767e5a964d3), with the following
configuration line:
$ ./configure --prefix=<....> --enable-mpi-fortran=usempi
I get:
coti@yyy:~$ mpiexec --mca btl vader,self -n 4 ./test_allreduce
Rank 0 / 4
Rank 1 / 4
Rank 2 / 4
Rank 3 / 4
Non-blocking
1,1,1,1 5 -1092661536
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 -1354461780
2,1,1,1 6 130622
----
Blocking
1,1,1,1 5 20
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 24
2,1,1,1 6 24
----
I have tried it with other MPI implementations (Intel MPI 19 and MPICH
3.3), and they gave me the same output with the blocking and
non-blocking calls:
coti@yyy:~$ mpiexec -n 4 ./test_allreduce
Rank 0 / 4
Rank 1 / 4
Rank 2 / 4
Rank 3 / 4
Non-blocking
1,1,1,1 5 20
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 24
2,1,1,1 6 24
----
Blocking
1,1,1,1 5 20
1,1,1,2 6 24
1,1,1,3 7 28
1,1,1,4 8 32
1,1,1,5 9 36
----
1,1,2,1 6 24
1,2,1,1 6 24
2,1,1,1 6 24
----
Is there anything wrong with my call to MPI_Iallreduce/MPI_Wait?
Thanks,
Camille
$ cat test_allreduce.f90
program main
use mpi
integer, allocatable, dimension(:,:,:,:,:) :: buff_in
integer, allocatable, dimension(:,:,:,:) :: buff_out
integer :: N, rank, size, err, i, j, k, l, m
integer :: req
N = 8
allocate( buff_in( N, N, N, N, N ) )
allocate( buff_out( N, N, N, N ) )
call mpi_init( err )
call mpi_comm_rank( mpi_comm_world, rank, err )
call mpi_comm_size( mpi_comm_world, size, err )
write( 6, * ) "Rank", rank, " / ", size
do i=1, N
do j=1, N
do k=1, N
do l=1, N
do m=1, N
buff_in( i, j, k, l, m ) = i + j + k + l + m
end do
end do
end do
end do
end do
buff_out( :,:,:,: ) = 0
! non-blocking
call mpi_iallreduce( buff_in( 1, :, :, :, : ), buff_out, N*N*N*N,
MPI_INT, MPI_SUM, mpi_comm_world, req, err )
call mpi_wait( req, MPI_STATUS_IGNORE, err )
if( 0 == rank ) then
write( 6, * ) "Non-blocking"
write( 6, * ) "1,1,1,1", buff_in( 1, 1, 1, 1, 1 ), buff_out( 1,
1, 1, 1 )
write( 6, * ) "1,1,1,2", buff_in( 1, 1, 1, 1, 2 ), buff_out( 1,
1, 1, 2 )
write( 6, * ) "1,1,1,3", buff_in( 1, 1, 1, 1, 3 ), buff_out( 1,
1, 1, 3 )
write( 6, * ) "1,1,1,4", buff_in( 1, 1, 1, 1, 4 ), buff_out( 1,
1, 1, 4 )
write( 6, * ) "1,1,1,5", buff_in( 1, 1, 1, 1, 5 ), buff_out( 1,
1, 1, 5 )
write( 6, * ) "----"
write( 6, * ) "1,1,2,1", buff_in( 1, 1, 1, 2, 1 ), buff_out( 1,
1, 2, 1 )
write( 6, * ) "1,2,1,1", buff_in( 1, 1, 2, 1, 1 ), buff_out( 1,
2, 1, 1 )
write( 6, * ) "2,1,1,1", buff_in( 1, 2, 1, 1, 1 ), buff_out( 2,
1, 1, 1 )
write( 6, * ) "----"
end if
! blocking
buff_out( :,:,:,: ) = 0
call mpi_allreduce( buff_in( 1, :, :, :, : ), buff_out, N*N*N*N,
MPI_INT, MPI_SUM, mpi_comm_world, err )
if( 0 == rank ) then
write( 6, * ) "Blocking"
write( 6, * ) "1,1,1,1", buff_in( 1, 1, 1, 1, 1 ), buff_out( 1,
1, 1, 1 )
write( 6, * ) "1,1,1,2", buff_in( 1, 1, 1, 1, 2 ), buff_out( 1,
1, 1, 2 )
write( 6, * ) "1,1,1,3", buff_in( 1, 1, 1, 1, 3 ), buff_out( 1,
1, 1, 3 )
write( 6, * ) "1,1,1,4", buff_in( 1, 1, 1, 1, 4 ), buff_out( 1,
1, 1, 4 )
write( 6, * ) "1,1,1,5", buff_in( 1, 1, 1, 1, 5 ), buff_out( 1,
1, 1, 5 )
write( 6, * ) "----"
write( 6, * ) "1,1,2,1", buff_in( 1, 1, 1, 2, 1 ), buff_out( 1,
1, 2, 1 )
write( 6, * ) "1,2,1,1", buff_in( 1, 1, 2, 1, 1 ), buff_out( 1,
2, 1, 1 )
write( 6, * ) "2,1,1,1", buff_in( 1, 2, 1, 1, 1 ), buff_out( 2,
1, 1, 1 )
write( 6, * ) "----"
end if
call mpi_finalize( err )
end program main