Hi!

I want to convert the input/output of my program to MPI-IO routines. To
write the same data, as is written in the already implement textmode
routines, I need a custom datatype consisting of several integers and reals.

While investigating the functionality, I created a module, which cares for
the type, and a small testprogram, which performs a mpi_scatter, a parallel
write with mpi_file_write and finally a single read from the master with
mpi_file_read, to compare the data with the original dataset.

My testtype for this particular problem is defined as an 8 byte real
followed by a 4 byte integer. This way, the datasize is 12 byte, while the
extent differs for different compilers (gfortran has an extent of 16 byte,
while ifort has an extent of 12 byte). I know, that I can fix the extent
with the help of mpi_type_create_resized after measuring the distance of
the elements in an array, but this doesn't show the strange behaviour, I
don't understand.

My problem is, that I don't really understand the way, how mpi_file_read
uses this extent to perform file-io on arrays of this data. On the one
hand, the data, which is written to the file, is only the data, which is
actually used, i.e. no padding is written to the file and thus, only the
mpi_type_size is interesting for the writing. On the other hand, the
missing padding in the extent of ifort causes mpi_file_read to skip some of
the last datasets. In my opinion, this would make some sense, if the
padding of the datasets would cause corruption on the read data after the
first padding, but this is not the case. If you execute the appended
program with 4 processes, the last two of eight datasets stay uninitialized
(keep their values of -1 for each member) what suggests that the amount of
data is somehow related to the extent rather than the type_size.

What is the problem here? Do I misunderstand, how these routines should
work?

Best regards,
Stefan

PS: Does the stat(1) variable mean anything? It returns 0 all the time.


### CODE ###

module mpiio
    !use mpi
    implicit none

    include 'mpif.h'

    private
    public :: r1i1_iotype
    public :: r1i1_mpi
    public :: init_mpiio, get_mpiio_dtypes

    type, bind(c) :: r1i1_iotype
        real(8) :: r
        integer :: i
    end type

    integer :: r1i1_mpi

contains

    ! initializer, which creates datatypes for all types in this module
    subroutine init_mpiio()
        r1i1_mpi = get_r1i1_type()
    end subroutine

    ! inforoutine, which prints information about all types in this module
    subroutine get_mpiio_dtypes()
        write(*,"(A80)") "=== Available Datatypes ================" // &
            & "========================================"
        call get_dtype_info(r1i1_mpi)
        write(*,"(A80)") "========================================" // &
            & "========================================"
    end subroutine

    ! generic routine, which prints information about a datatype
    subroutine get_dtype_info(dt)
        integer :: dt
        integer :: ierror, nl, i, sz
        character(LEN=63) :: type_name
        integer(mpi_address_kind) :: lb, extent
        integer(mpi_address_kind) :: true_lb, true_extent
        integer :: comb
        integer :: num_int, num_addr, num_dt
        integer, dimension(:), allocatable :: aob
        integer(mpi_address_kind), dimension(:), allocatable :: aod
        integer, dimension(:), allocatable :: aot

        ! get name of datatype
        call mpi_type_get_name(dt, type_name, nl, ierror)

        write(*,*) "Name: ", type_name

        ! get start address and extent
        call mpi_type_get_extent(dt, lb, extent, ierror)
        call mpi_type_get_true_extent(dt, true_lb, true_extent, ierror)

        write(*,*) "lb:     ", lb,     " true lb:     ", true_lb
        write(*,*) "extent: ", extent, " true extent: ", true_extent

        ! get contents
        call mpi_type_get_envelope(dt, num_int, num_addr, num_dt, comb,
ierror)

        allocate(aob(num_int))
        allocate(aod(num_addr))
        allocate(aot(num_dt))

        call mpi_type_get_contents(dt, num_int, num_addr, num_dt, aob, aod,
aot, ierror)

        write(*,*) "consists of: ", num_int - 1, aob(1)

        do i = 1, num_int - 1
            call mpi_type_get_name(aot(i), type_name, nl, ierror)
            write(*,*) i, ":", aob(i+1), aod(i), trim(type_name)
        end do

        call mpi_type_size(dt, sz, ierror)
        write(*,*) "data size: ", sz
    end subroutine

    function get_r1i1_type() result(dt)
        ! datatype
        integer :: dt
        type(r1i1_iotype) :: t
        integer :: ierror
        integer(mpi_address_kind) :: a_base, a_r, a_i
        ! number of blocks
        integer, parameter :: cnt = 2
        ! elements in each block
        integer, dimension(cnt) :: aob
        ! types in each block
        integer, dimension(cnt) :: aot
        ! displacements of each block
        integer(mpi_address_kind), dimension(cnt) :: aod

        aob = [1, 1]
        aot = [mpi_double_precision, mpi_integer]

        ! get base address and address of members
        call mpi_get_address(t, a_base, ierror)
        call mpi_get_address(t%r, a_r, ierror)
        call mpi_get_address(t%i, a_i, ierror)

        ! convert to offsets/displacements
        a_r = a_r - a_base
        a_i = a_i - a_base

        ! displacements
        aod = [a_r, a_i]

        ! create type
        call mpi_type_create_struct(cnt, aob, aod, aot, dt, ierror)
        ! name the type
        call mpi_type_set_name(dt, "r1i1", ierror)

        ! commit type
        call mpi_type_commit(dt, ierror)

    end function
end module

program test
    use mpiio

    implicit none

    include 'mpif.h'

    type(r1i1_iotype), dimension(:), allocatable :: v, w
    integer :: dt
    integer :: myrank, mpisize
    integer :: ierror
    real(8) :: ranarray(2)
    integer :: i, n, m
    integer :: fh
    integer, dimension(mpi_status_size) :: stat
    integer(mpi_offset_kind) :: fp, fs, fs2
    integer :: sz
    logical :: correct

    ! problem size
    m = 2

    ! initializer
    call mpi_init(ierror)
    call mpi_comm_rank(mpi_comm_world, myrank, ierror)
    call mpi_comm_size(mpi_comm_world, mpisize, ierror)
    ! initialize datatypes and print info
    call init_mpiio()
    if (myrank == 0) then
        call get_mpiio_dtypes()
    endif
    dt = r1i1_mpi

    ! synchronize to avoid mess of output
    call mpi_barrier(mpi_comm_world, ierror)

    ! create data
    n = mpisize * m
    allocate(w(m))
    ! root process will create a batch of values
    if (myrank == 0) then
        allocate(v(n))
        do i = 1, n
            call random_number(ranarray)
            v(i) % r = ranarray(1)
            v(i) % i = int(huge(v(i)%i) * ranarray(2))
        end do
    end if

    ! scatter across the processes
    call mpi_scatter(v, m, dt, w, m, dt, 0, mpi_comm_world, ierror)

    ! open collective file
    call mpi_file_open(mpi_comm_world, "testfile", mpi_mode_create +
mpi_mode_rdwr, mpi_info_null, fh, ierror)
    ! get size of datatype to calculate position in file
    call mpi_type_size(dt, sz, ierror)
    fs = n * sz
    fp = myrank * m * sz
    if (myrank == 0) then
        write(*,*) sz
    end if
    ! search position for each process
    call mpi_file_seek(fh, fp, mpi_seek_set, ierror)
    ! write personal part
    call mpi_file_write(fh, w, m, dt, stat, ierror)
    ! synchronize the file, so the master process can start reading
    call mpi_file_sync(fh, ierror)
    call mpi_barrier(mpi_comm_world, ierror)

    if (myrank == 0) then
        ! inquire the file size and compare with the previously calculated
        call mpi_file_get_size(fh, fs2, ierror)
        write(*,*) "file size: ", fs2, "(", fs, ")"

        ! create space for read data
        deallocate(w)
        allocate(w(n))
        ! initialize all fields with -1
        do i = 1, n
            w(i) % r = -1
            w(i) % i = -1
        end do
        ! search the beginning
        call mpi_file_seek(fh, int(0,kind=mpi_offset_kind), mpi_seek_set,
ierror)
        ! read all data
        call mpi_file_read(fh, w, n, dt, stat, ierror)
        write(*,*) ierror
        write(*,*) stat
        write(*,*)

        ! compare
        correct = .true.
        do i = 1, n
            correct = correct .and. (v(i) % r == w(i) % r)
            correct = correct .and. (v(i) % i == w(i) % i)
            write(*,*) i
            write(*,*) v(i) % r
            write(*,*) w(i) % r
            write(*,*) v(i) % i
            write(*,*) w(i) % i
            write(*,*)
        end do

        write(*,*) "original and read data are the same: ", correct

        deallocate(v)
    end if
    deallocate(w)

    ! close file
    call mpi_file_close(fh, ierror)

    ! finish
    call mpi_finalize(ierror)

end program

Reply via email to