Re: [Hdf-forum] HDF5 parallel: writing chunks of different size messes up H5 file

Elena Pourmal Sat, 15 Jun 2013 15:35:12 -0700

Thomas,

All processes have to provide the same chunk size when creating a dataset (it 
is a collective call), but each process can write different amount of data 
(i.e., hyperslabs will may have different size and shapes).  It is 
application's responsibility to handle the cases when hyperslabs from different 
processes intersect.


Elena 


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Elena Pourmal  The HDF Group  http://hdfgroup.org   
1800 So. Oak St., Suite 203, Champaign IL 61820
217.531.6112
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~



On Jun 15, 2013, at 2:21 AM, Thomas Engels wrote:

> Hello, 
> 
> I'm trying to save arrays from distributed memory codes, and it may happen 
> that the chunks on each CPU differ in size. 
> 
> If all CPU hold the same amount of data, everything works smoothly, but I 
> cannot for the life of me figure out what goes wrong if they don't. 
> 
> this is the minimal example: 
> 
> %--------------------------------------------------------------------------------------------------------------------------------------------
>  
> program test 
>   use hdf5 
>   use mpi 
>   implicit none 
>   character(len=11), parameter :: filename = "sds_chnk.h5"  ! file name 
>   character(len=8), parameter :: dsetname = "intarray" ! dataset name 
>   integer(hid_t) :: file_id       ! file identifier 
>   integer(hid_t) :: dset_id       ! dataset identifier 
>   integer(hid_t) :: filespace     ! dataspace identifier in file 
>   integer(hid_t) :: memspace      ! dataspace identifier in memory 
>   integer(hid_t) :: plist_id      ! property list identifier 
>   !!!!!!!!!!!!!!!!!!!!!!!!!!! 
>   integer, parameter :: nx=4,ny=8    ! (global-) size of array 
>   integer(hsize_t), dimension(2) :: dimsf = (/nx,ny/) ! global dataset size 
>   !!!!!!!!!!!!!!!!!!!!!!!!!!! 
>   integer(hsize_t), dimension(2) :: chunk_dims ! chunks dimensions (local) 
>   integer(hsize_t),  dimension(2) :: count   = (/1,1/) 
>   integer(hssize_t), dimension(2) :: offset 
>   integer(hsize_t),  dimension(2) :: stride = (/1,1/) 
>   integer(hsize_t),  dimension(2) :: block 
>   integer, allocatable :: data (:,:)  ! data to write 
>   integer :: rank = 2 ! dataset rank 
>   integer :: error, error_n  ! error flags 
>   integer :: xmin,xmax,ymin,ymax 
>   integer :: mpierror       ! mpi error flag 
>   integer :: comm, info 
>   integer :: mpi_size, mpi_rank 
> 
>   comm = MPI_COMM_WORLD 
>   info = MPI_INFO_NULL 
> 
>   call mpi_init(mpierror) 
>   call mpi_comm_size(comm, mpi_size, mpierror) 
>   call mpi_comm_rank(comm, mpi_rank, mpierror) 
> 
>   ! quit if mpi_size is not 4 
>   if (mpi_size .ne. 4) then 
>     write(*,*) 'this example is set up to use only 4 processes' 
>     goto 100 
>   endif 
> 
>   if (mpi_rank==0) then; xmin = 1; xmax = 2;  ymin = 1; ymax = 4 ; endif 
>   if (mpi_rank==1) then; xmin = 3; xmax = nx; ymin = 1; ymax = 4 ; endif 
>   if (mpi_rank==2) then; xmin = 1; xmax = 2;  ymin = 5; ymax = ny; endif 
>   if (mpi_rank==3) then; xmin = 3; xmax = nx; ymin = 5; ymax = ny; endif 
> 
>   chunk_dims(1) = xmax-xmin+1 
>   chunk_dims(2) = ymax-ymin+1 
> 
>   offset(1) = xmin-1 ! as xmin is one-based, convert it here 
>   offset(2) = ymin-1 
> 
>   block = chunk_dims 
> 
> 
>   ! initialize data buffer with trivial data. 
>   allocate (data(1:chunk_dims(1),1:chunk_dims(2))) 
>   data = mpi_rank + 1 
> 
>   write (*,'("rank=",i1," 
> data=",2(i2,1x),"offset=",2(i2,1x),"block=",2(i2,1x))') & 
>   mpi_rank, shape(data), offset, block 
> 
>   ! initialize hdf5 library and fortran interfaces. 
>   call h5open_f(error) 
> 
>   ! setup file access property list with parallel i/o access. 
>   call h5pcreate_f(H5P_FILE_ACCESS_F, plist_id, error) 
>   call h5pset_fapl_mpio_f(plist_id, comm, info, error) 
>   ! create the file collectively. 
>   call h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, error, access_prp = 
> plist_id) 
>   call h5pclose_f(plist_id, error) 
>   ! create the data space for the  dataset. 
>   call h5screate_simple_f(rank, dimsf, filespace, error) 
>   call h5screate_simple_f(rank, chunk_dims, memspace, error) 
> 
>   ! create chunked dataset. 
>   call h5pcreate_f(h5p_dataset_create_f, plist_id, error) 
>   call h5pset_chunk_f(plist_id, rank, chunk_dims, error) 
>   call h5dcreate_f(file_id, dsetname, H5T_NATIVE_INTEGER, filespace, & 
>                   dset_id, error, plist_id) 
>   call h5sclose_f(filespace, error) 
> 
>   ! select hyperslab in the file. 
>   call h5dget_space_f(dset_id, filespace, error) 
>   call 
> h5sselect_hyperslab_f(filespace,H5S_SELECT_SET_F,offset,count,error,stride,block)
> 
> 
>   ! create property list for collective dataset write 
>   call h5pcreate_f(h5p_dataset_xfer_f, plist_id, error) 
>   call h5pset_dxpl_mpio_f(plist_id, H5FD_MPIO_COLLECTIVE_F, error) 
> 
>   ! write the dataset collectively. 
>   call h5dwrite_f(dset_id, H5T_NATIVE_INTEGER, data, chunk_dims, error, & 
>                   file_space_id = filespace, mem_space_id = memspace, 
> xfer_prp = plist_id) 
> 
> 
> 
>   ! close everything 
>   call h5sclose_f(filespace, error) 
>   call h5sclose_f(memspace, error) 
>   call h5dclose_f(dset_id, error) 
>   call h5pclose_f(plist_id, error) 
>   call h5fclose_f(file_id, error) 
>   call h5close_f(error) 
> 
> 100  continue 
>   call mpi_finalize(mpierror) 
>   deallocate(data) 
> end program test 
> %----------------------------------------------------------------------------------------------------------------------------------------------------------
>  
> Here's the h5dump of the file with equal amount of data, nx=4 (this is the 
> minimal example from the tutorials): 
> HDF5 "sds_chnk.h5" { 
> GROUP "/" { 
>    DATASET "intarray" { 
>       DATATYPE  H5T_STD_I32LE 
>       DATASPACE  SIMPLE { ( 8, 4 ) / ( 8, 4 ) } 
>       DATA { 
>       (0,0): 1, 1, 2, 2, 
>       (1,0): 1, 1, 2, 2, 
>       (2,0): 1, 1, 2, 2, 
>       (3,0): 1, 1, 2, 2, 
>       (4,0): 3, 3, 4, 4, 
>       (5,0): 3, 3, 4, 4, 
>       (6,0): 3, 3, 4, 4, 
>       (7,0): 3, 3, 4, 4 
>       } 
>    } 
> } 
> } 
> 
> 
> if I a add a column, nx=5, then this gets messed up: 
> HDF5 "sds_chnk.h5" { 
> GROUP "/" { 
>    DATASET "intarray" { 
>       DATATYPE  H5T_STD_I32LE 
>       DATASPACE  SIMPLE { ( 8, 5 ) / ( 8, 5 ) } 
>       DATA { 
>       (0,0): 1, 1, 2, 0, 2, 
>       (1,0): 2, 1, 0, 2, 2, 
>       (2,0): 1, 2, 2, 2, 0, 
>       (3,0): 1, 1, 0, 2, 2, 
>       (4,0): 3, 3, 4, 0, 4, 
>       (5,0): 4, 3, 0, 4, 4, 
>       (6,0): 3, 4, 4, 4, 0, 
>       (7,0): 3, 3, 0, 4, 4 
>       } 
>    } 
> } 
> } 
> 
> 
> any help would be highly appreciated! 
> -Thomas 
> _______________________________________________
> Hdf-forum is for HDF software users discussion.
> [email protected]
> http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

Re: [Hdf-forum] HDF5 parallel: writing chunks of different size messes up H5 file

Reply via email to