我构建了一个使用MPI_PACK、MPI_SEND、MPI_RECV分发fortran派生类型的最小示例,并将它们的边界交换为测试MPI_SENDRECV派生类型。
代码运行良好,但如果我将deallocate语句放在代码中间,则会显示一些奇怪的行为,这是由于内存损坏造成的,而代码与代码末尾的deallocate语句工作得很好。dellocate语句在main脚本的左侧用(*)标记。
代码的流程是,
1) MPI_PACK的整个派生类型。
2)用MPI_SEND、MPI_RECV和MPI_UNPACK来恢复导出的类型结构。
3) MPI_PACK分布局部导出类型的边界。
4)使用MPI_SENDRECV在相邻处理器之间交换边界
我把我测试过的相同的代码放在一起,这样它们就可以像mpif90 mod_data_structure.f90 main.f90 -o main一样很好地编译,而且问题是完全可以重复的。下面的结果是来自mpirun -np 2 main的输出。
module mod_data_structure
implicit none
type type_cell
real(selected_real_kind(15,307)):: xc(2)
real(selected_real_kind(15,307)):: values_c(8)
integer :: flag_boundary
end type type_cell
type type_cell_list
type(type_cell) :: cell(13,13)
end type type_cell_list
type type_cell_list_local
type(type_cell),allocatable :: cell(:,:)
end type type_cell_list_local
end module mod_data_structureprogram main
use MPI
use mod_data_structure
implicit none
integer,parameter :: nxmax = 9, nymax = 9, nbc = 2
integer :: i, j, k, ii, jj
type(type_cell_list) :: A
type(type_cell_list_local) :: A_local
type(type_cell) :: acell
character(len=20) :: write_fmt
! MPI variables
integer :: n_proc, my_id, ierr, source, dest
integer :: tag, tag_send, tag_recv
integer :: status ( MPI_STATUS_SIZE ), &
status_l ( MPI_STATUS_SIZE ), &
status_r ( MPI_STATUS_SIZE )
integer,allocatable :: local_size(:), local_start(:)
real(selected_real_kind(15,307)):: tmp
character,allocatable :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:)
integer :: bufsize, bufsize_gc
integer :: left_proc, right_proc
integer :: DBL_SIZE, INT_SIZE, position_local
integer :: position_l, position_r
integer,allocatable :: position(:)
call MPI_INIT ( ierr )
call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id, ierr )
call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )
call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
call MPI_PACK_SIZE(1,MPI_INTEGER ,MPI_COMM_WORLD,INT_SIZE,ierr)
! Construct the derived data types
if ( my_id .eq. 0 ) then
do i = 1,nxmax+2*nbc
do j = 1,nymax+2*nbc
A%cell(i,j)%flag_boundary = 0
do k = 1,8
A%cell(i,j)%values_c(k) = 0.d0
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 0.d0
enddo
enddo
enddo
do i = 1+nbc,nxmax+nbc
do j = 1+nbc,nymax+nbc
ii = i - nbc
jj = j - nbc
A%cell(i,j)%flag_boundary = 10*ii + jj
do k = 1,8
A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
enddo
enddo
enddo
write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
write(*,*) 'my_id ', my_id
write(*,*) 'Total flag_boundary'
do i = 1,nxmax+2*nbc
write(*,write_fmt) A%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!*** Test MPI_PACK and MPI_SEND / MPI_RECV
! Prepare for the distribution
allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
local_size = 0
local_start = 1
tmp = (nymax+2*nbc) / n_proc
! 'local_size'
do i = 1,n_proc-1
local_size(i) = ceiling(tmp)
enddo
local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)
allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) ) ! ###
! 'local_start'
do i = 1,n_proc-1
local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
enddo
! allocate 'buffer'
bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer(bufsize) )
position = 0
if ( my_id .eq. 0 ) then
! Assign 'A_local' for 'my_id .eq. 0' itself
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
A_local%cell(i,j) = A%cell(i,j)
enddo
enddo
do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
do j = local_start(k), local_start(k) + local_size(k) - 1
do i = 1,nxmax+2*nbc
acell = A%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
enddo
enddo
dest = k-1 ! ###
tag = k-1
call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )
enddo
else ! ( my_id .ne. 0 ) then
source = 0
tag = my_id
call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )
position_local = 0
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
call MPI_UNPACK (buffer, bufsize, position_local, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
endif
(*)!deallocate ( buffer )
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' Before MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
! Test MPI_SENDRECV
bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )
! 'left_proc'
if ( my_id .eq. 0 ) then
left_proc = MPI_PROC_NULL
else ! ( my_id .ne. 0 ) then
left_proc = my_id - 1
endif
! 'right_proc'
if ( my_id .eq. n_proc-1 ) then
right_proc = MPI_PROC_NULL
else ! ( my_id .ne. n_proc - 1 )
right_proc = my_id + 1
endif
! pack 'buffer_l' & 'buffer_r'
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
enddo
enddo
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
enddo
enddo
tag_send = my_id
tag_recv = right_proc
call MPI_SENDRECV (buffer_l, bufsize_gc, MPI_PACKED, left_proc, 0, &
buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
MPI_COMM_WORLD, status_l, ierr )
tag_send = my_id
tag_recv = left_proc
call MPI_SENDRECV (buffer_r, bufsize_gc, MPI_PACKED, right_proc, 0, &
buffer_lg, bufsize_gc, MPI_PACKED, left_proc, 0, &
MPI_COMM_WORLD, status_r, ierr )
! fill left boundary
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
! fill right boundary
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' After MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
(*)deallocate ( buffer )
deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )
call MPI_FINALIZE ( ierr )
end program在代码的中间使用deallocate(buffer),输出的一部分如下所示,正如我所希望的那样工作。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
0 0 0 0 0 0
0 0 11 12 15 16
0 0 21 22 25 26
0 0 31 32 35 36
0 0 41 42 45 46
0 0 51 52 55 56
0 0 61 62 65 66
0 0 71 72 75 76
0 0 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0但是,如果我在代码中间找到deallocate(buffer),输出的相同部分如下所示。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
****** 0 0 0 0
****** 11 12 15 16
****** 21 22 25 26
****** 31 32 35 36
****** 41 42 45 46
****** 51 52 55 56
****** 61 62 65 66
****** 71 72 75 76
****** 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0如果我改变write格式以显示更多的整数位数,它们是整数的10位数,类似于1079533568。
我在Segmentation Fault using MPI_Sendrecv with a 2D contiguous array上看到过这样的问题,但是没有明确的答案来解释为什么将我不会使用的变量的deallocate语句放在代码的中间,就会产生这样的问题。
这个问题从何而来?
发布于 2020-06-18 13:25:17
我不确定我是否公平地回答了这个问题,但我在派生类型方面的实际经验是,使用不同的MPI实现处理它们的最安全的方法是不使用任何高级MPI构造,并在Fortran端保留所有派生类型的工作。
例如,我编写pure函数来打包和展开数据类型:
integer, parameter :: TYPE_CELL_BUFSIZE = 11
pure function type_cell_pack(this) result(buffer)
class(type_cell), intent(in) :: this
real(real64) :: buffer(TYPE_CELL_BUFSIZE)
buffer(1:8) = this%values_c
buffer(9:10) = this%xc
! It will be faster to not use a separate MPI command for this only
buffer(11) = real(this%flag_boundary,real64)
end function type_cell_pack
pure type(type_cell) function type_cell_unpack(buffer) result(this)
real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)
this%values_c = buffer(1:8)
this%xc = buffer(9:10)
this%flag_boundary = nint(buffer(11))
end function type_cell_unpack然后只使用MPI_send和MPI_recv为MPI通信编写两个包装器,如下所示:
subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: this
integer, intent(in) :: fromCpu,toCpu,mpiWorld
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)
if (cpuid==fromCpu) then
mpibuf = type_cell_pack(this)
call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
this = type_cell_unpack(mpibuf)
endif
end subroutine type_cell_send_scalar数组数量如下:
subroutine type_cell_send_array(these,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: these(:)
integer, intent(in) :: fromCpu,toCpu,mpiWorld
integer :: i,ncell,bufsize
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE*size(these))
ncell = size(these)
bufsize = ncell*TYPE_CELL_BUFSIZE
if (cpuid==fromCpu) then
do i=1,ncell
mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE) = type_cell_pack(these(i))
end do
call mpi_send(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
do i=1,ncell
these(i) = type_cell_unpack(mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE))
end do
endif
end subroutine type_cell_send_arrayhttps://stackoverflow.com/questions/62446057
复制相似问题