FMS  2024.03
Flexible Modeling System
mpp_util_mpi.inc
1 ! -*-f90-*-
2 
3 !***********************************************************************
4 !* GNU Lesser General Public License
5 !*
6 !* This file is part of the GFDL Flexible Modeling System (FMS).
7 !*
8 !* FMS is free software: you can redistribute it and/or modify it under
9 !* the terms of the GNU Lesser General Public License as published by
10 !* the Free Software Foundation, either version 3 of the License, or (at
11 !* your option) any later version.
12 !*
13 !* FMS is distributed in the hope that it will be useful, but WITHOUT
14 !* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 !* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 !* for more details.
17 !*
18 !* You should have received a copy of the GNU Lesser General Public
19 !* License along with FMS. If not, see <http://www.gnu.org/licenses/>.
20 !***********************************************************************
21 !> @file
22 !> @brief Utility routines for parallelization with MPI
23 
24 !> @addtogroup mpp_mod
25 !> @{
26 
27 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
28 ! !
29 ! MISCELLANEOUS UTILITIES: mpp_error !
30 ! !
31 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
32 
33 !> A very basic error handler
34 !! uses ABORT and FLUSH calls, may need to use cpp to rename
35 subroutine mpp_error_basic( errortype, errormsg )
36 #ifdef __INTEL_COMPILER
37  ! Intel module containing tracebackQQ
38  use ifcore
39 #endif
40  integer, intent(in) :: errortype
41  character(len=*), intent(in), optional :: errormsg
42  character(len=512) :: text
43  integer :: errunit
44 
45  if( .NOT.module_is_initialized )call abort()
46 
47  select case( errortype )
48  case(note)
49  text = 'NOTE' !just FYI
50  case(warning)
51  text = 'WARNING' !probable error
52  case(fatal)
53  text = 'FATAL' !fatal error
54  case default
55  text = 'WARNING: non-existent errortype (must be NOTE|WARNING|FATAL)'
56  end select
57 
58  if( npes.GT.1 )write( text,'(a,i6)' )trim(text)//' from PE', pe !this is the mpp part
59  if( PRESENT(errormsg) )text = trim(text)//': '//trim(errormsg)
60 !$OMP CRITICAL (MPP_ERROR_CRITICAL)
61  select case( errortype )
62  case(note)
63  if(pe==root_pe) then
64  write( out_unit,'(a)' )trim(text)
65  write( warn_unit,'(a)' )trim(text)
66  endif
67  case default
68  errunit = stderr()
69  write( errunit, '(/a/)' )trim(text)
70  if(pe==root_pe) then
71  write( out_unit,'(/a/)' )trim(text)
72  write( warn_unit,'(/a/)' )trim(text)
73  endif
74  if( errortype.EQ.fatal .OR. warnings_are_fatal )then
75  FLUSH(out_unit)
76  FLUSH(warn_unit)
77  close(warn_unit)
78 #ifdef __INTEL_COMPILER
79  ! Get traceback and return quietly for correct abort
80  call tracebackqq(user_exit_code=-1)
81 #elif __GFORTRAN__
82  call backtrace
83 #endif
84  call mpi_abort( mpi_comm_world, 1, error )
85  end if
86  end select
87 
88  error_state = errortype
89 !$OMP END CRITICAL (MPP_ERROR_CRITICAL)
90 
91 
92 end subroutine mpp_error_basic
93 
94 !#####################################################################
95 !> Makes a PE set out of a PE list. A PE list is an ordered list of PEs
96 !! a PE set is a triad (start,log2stride,size) for SHMEM, an a communicator for MPI
97 !! if stride is non-uniform or not a power of 2,
98 !! will return error (not required for MPI but enforced for uniformity)
99 function get_peset(pelist)
100  integer :: get_peset
101  integer, intent(in), optional :: pelist(:)
102  integer :: errunit
103  integer :: i, n
104 
105  if( .NOT.PRESENT(pelist) )then !set it to current_peset_num
106  get_peset = current_peset_num; return
107  end if
108 
109  !--- first make sure pelist is monotonically increasing.
110  if (size(pelist(:)) .GT. 1) then
111  do n = 2, size(pelist(:))
112  if(pelist(n) <= pelist(n-1)) call mpp_error(fatal, "GET_PESET: pelist is not monotonically increasing")
113  enddo
114  endif
115 
116  errunit = stderr()
117  if( debug )write( errunit,* )'pelist=', pelist
118 
119  !find if this array matches any existing peset
120  do i = 1,peset_num
121  if( debug )write( errunit,'(a,3i6)' )'pe, i, peset_num=', pe, i, peset_num
122  if( size(pelist(:)).EQ.size(peset(i)%list(:)) )then
123  if( all(pelist.EQ.peset(i)%list) )then
124  get_peset = i; return
125  end if
126  end if
127  end do
128  !not found, so create new peset
129  peset_num = peset_num + 1
130  if( peset_num > current_peset_max ) call expand_peset()
131  i = peset_num !shorthand
132  !create list
133  allocate( peset(i)%list(size(pelist(:))) )
134  peset(i)%list(:) = pelist(:)
135  peset(i)%count = size(pelist(:))
136 
137  call mpi_group_incl( peset(current_peset_num)%group, size(pelist(:)), pelist-mpp_root_pe(), peset(i)%group, error )
138  call mpi_comm_create_group(peset(current_peset_num)%id, peset(i)%group, &
139  default_tag, peset(i)%id, error )
140  get_peset = i
141 
142  return
143 
144 end function get_peset
145 
146 !#######################################################################
147 !> Synchronize PEs in list
148 subroutine mpp_sync( pelist, do_self )
149  integer, intent(in), optional :: pelist(:)
150  logical, intent(in), optional :: do_self
151  logical :: dself
152  integer :: n
153 
154  dself=.true.; if(PRESENT(do_self))dself=do_self
155 ! if(dself)call mpp_sync_self(pelist)
156 
157  n = get_peset(pelist); if( peset(n)%count.EQ.1 )return
158 
159  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
160  call mpi_barrier( peset(n)%id, error )
161 
162  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
163 
164  return
165 end subroutine mpp_sync
166 
167 !#######################################################################
168 !> This is to check if current PE's outstanding puts are complete
169 !! but we can't use shmem_fence because we are actually waiting for
170 !! a remote PE to complete its get
171 subroutine mpp_sync_self( pelist, check, request, msg_size, msg_type)
172  integer, intent(in), optional :: pelist(:)
173  integer, intent(in), optional :: check
174  integer, intent(inout), optional :: request(:)
175  integer, intent(in ), optional :: msg_size(:)
176  integer, intent(in ), optional :: msg_type(:)
177 
178  integer :: m, my_check, rsize
179 
180  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
181  my_check = event_send
182  if(present(check)) my_check = check
183  if( my_check .NE. event_send .AND. my_check .NE. event_recv ) then
184  call mpp_error( fatal, 'mpp_sync_self: The value of optional argument check should be EVENT_SEND or EVENT_RECV')
185  endif
186 
187  if(PRESENT(request)) then
188  if( .not. present(check) ) then
189  call mpp_error(fatal, 'mpp_sync_self: check is not present when request is present')
190  endif
191  if( my_check == event_recv ) then
192  if( .not. present(msg_size) ) then
193  call mpp_error(fatal, 'mpp_sync_self: msg_size is not present when request is present and it is EVENT_RECV')
194  endif
195  if( .not. present(msg_type) ) then
196  call mpp_error(fatal, 'mpp_sync_self: msg_type is not present when request is present and it is EVENT_RECV')
197  endif
198  if(size(msg_size) .NE. size(request)) then
199  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_size and request')
200  endif
201  if(size(msg_type) .NE. size(request)) then
202  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_type and request')
203  endif
204 
205  do m = 1, size(request(:))
206  if( request(m) == mpi_request_null ) cycle
207  call mpi_wait(request(m), stat, error )
208  call mpi_get_count(stat, msg_type(m), rsize, error)
209  if(msg_size(m) .NE. rsize) then
210  call mpp_error(fatal, "mpp_sync_self: msg_size does not match size of data received")
211  endif
212  enddo
213  else
214  do m = 1, size(request(:))
215  if(request(m) .NE.mpi_request_null )call mpi_wait(request(m), stat, error )
216  enddo
217  endif
218  else
219  select case(my_check)
220  case(event_send)
221  do m = 1,cur_send_request
222  if( request_send(m).NE.mpi_request_null )call mpi_wait( request_send(m), stat, error )
223  end do
224  cur_send_request = 0
225  case(event_recv)
226  do m = 1,cur_recv_request
227  call mpi_wait( request_recv(m), stat, error )
228  call mpi_get_count(stat, type_recv(m), rsize, error)
229  if(size_recv(m) .NE. rsize) then
230  call mpp_error(fatal, "mpp_sync_self: size_recv does not match of data received")
231  endif
232  size_recv(m) = 0
233  end do
234  cur_recv_request = 0
235  end select
236  endif
237  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
238  return
239 end subroutine mpp_sync_self
240 !> @}
subroutine mpp_sync_self(pelist, check, request, msg_size, msg_type)
This is to check if current PE's outstanding puts are complete but we can't use shmem_fence because w...
subroutine mpp_error_basic(errortype, errormsg)
A very basic error handler uses ABORT and FLUSH calls, may need to use cpp to rename.
integer function stderr()
This function returns the current standard fortran unit numbers for error messages.
Definition: mpp_util.inc:51
subroutine mpp_sync(pelist, do_self)
Synchronize PEs in list.
subroutine expand_peset()
This routine will double the size of peset and copy the original peset data into the expanded one....
Definition: mpp_util.inc:1100