FMS  2025.04
Flexible Modeling System
mpp_util_mpi.inc
1 ! -*-f90-*-
2 
3 !***********************************************************************
4 !* Apache License 2.0
5 !*
6 !* This file is part of the GFDL Flexible Modeling System (FMS).
7 !*
8 !* Licensed under the Apache License, Version 2.0 (the "License");
9 !* you may not use this file except in compliance with the License.
10 !* You may obtain a copy of the License at
11 !*
12 !* http://www.apache.org/licenses/LICENSE-2.0
13 !*
14 !* FMS is distributed in the hope that it will be useful, but WITHOUT
15 !* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied;
16 !* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17 !* PARTICULAR PURPOSE. See the License for the specific language
18 !* governing permissions and limitations under the License.
19 !***********************************************************************
20 !> @file
21 !> @brief Utility routines for parallelization with MPI
22 
23 !> @addtogroup mpp_mod
24 !> @{
25 
26 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
27 ! !
28 ! MISCELLANEOUS UTILITIES: mpp_error !
29 ! !
30 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
31 
32 !> A very basic error handler
33 !! uses ABORT and FLUSH calls, may need to use cpp to rename
34 subroutine mpp_error_basic( errortype, errormsg )
35 #ifdef __INTEL_COMPILER
36  ! Intel module containing tracebackQQ
37  use ifcore
38 #endif
39  integer, intent(in) :: errortype
40  character(len=*), intent(in), optional :: errormsg
41  character(len=512) :: text
42  integer :: errunit
43 
44  if( .NOT.module_is_initialized )call abort()
45 
46  select case( errortype )
47  case(note)
48  text = 'NOTE' !just FYI
49  case(warning)
50  text = 'WARNING' !probable error
51  case(fatal)
52  text = 'FATAL' !fatal error
53  case default
54  text = 'WARNING: non-existent errortype (must be NOTE|WARNING|FATAL)'
55  end select
56 
57  if( npes.GT.1 )write( text,'(a,i6)' )trim(text)//' from PE', pe !this is the mpp part
58  if( PRESENT(errormsg) )text = trim(text)//': '//trim(errormsg)
59 !$OMP CRITICAL (MPP_ERROR_CRITICAL)
60  select case( errortype )
61  case(note)
62  if(pe==root_pe) then
63  write( out_unit,'(a)' )trim(text)
64  write( warn_unit,'(a)' )trim(text)
65  endif
66  case default
67  errunit = stderr()
68  write( errunit, '(/a/)' )trim(text)
69  if(pe==root_pe) then
70  write( out_unit,'(/a/)' )trim(text)
71  write( warn_unit,'(/a/)' )trim(text)
72  endif
73  if( errortype.EQ.fatal .OR. warnings_are_fatal )then
74  FLUSH(out_unit)
75  FLUSH(warn_unit)
76  close(warn_unit)
77 #ifdef __INTEL_COMPILER
78  ! Get traceback and return quietly for correct abort
79  call tracebackqq(user_exit_code=-1)
80 #elif __GFORTRAN__
81  call backtrace
82 #endif
83  call mpi_abort( mpi_comm_world, 1, error )
84  end if
85  end select
86 
87  error_state = errortype
88 !$OMP END CRITICAL (MPP_ERROR_CRITICAL)
89 
90 
91 end subroutine mpp_error_basic
92 
93 !#####################################################################
94 !> Makes a PE set out of a PE list. A PE list is an ordered list of PEs
95 !! a PE set is a triad (start,log2stride,size) for SHMEM, an a communicator for MPI
96 !! if stride is non-uniform or not a power of 2,
97 !! will return error (not required for MPI but enforced for uniformity)
98 function get_peset(pelist)
99  integer :: get_peset
100  integer, intent(in), optional :: pelist(:)
101  integer :: errunit
102  integer :: i, n
103  integer, allocatable :: pelist_tmp(:)
104 
105  if( .NOT.PRESENT(pelist) )then !set it to current_peset_num
106  get_peset = current_peset_num; return
107  end if
108 
109  !--- first make sure pelist is monotonically increasing.
110  if (size(pelist(:)) .GT. 1) then
111  do n = 2, size(pelist(:))
112  if(pelist(n) <= pelist(n-1)) call mpp_error(fatal, "GET_PESET: pelist is not monotonically increasing")
113  enddo
114  endif
115 
116  errunit = stderr()
117  if( debug )write( errunit,* )'pelist=', pelist
118 
119  !find if this array matches any existing peset
120  do i = 1,peset_num
121  if( debug )write( errunit,'(a,3i6)' )'pe, i, peset_num=', pe, i, peset_num
122  if( size(pelist(:)).EQ.size(peset(i)%list(:)) )then
123  if( all(pelist.EQ.peset(i)%list) )then
124  get_peset = i; return
125  end if
126  end if
127  end do
128  !not found, so create new peset
129  peset_num = peset_num + 1
130  if( peset_num > current_peset_max ) call expand_peset()
131  i = peset_num !shorthand
132  !create list
133  allocate( peset(i)%list(size(pelist(:))) )
134  peset(i)%list(:) = pelist(:)
135  peset(i)%count = size(pelist(:))
136 
137  allocate(pelist_tmp(size(pelist(:))))
138  pelist_tmp = pelist - mpp_root_pe()
139  call mpi_group_incl( peset(current_peset_num)%group, size(pelist(:)), pelist_tmp, peset(i)%group, error )
140  call mpi_comm_create_group(peset(current_peset_num)%id, peset(i)%group, &
141  default_tag, peset(i)%id, error )
142  get_peset = i
143  deallocate(pelist_tmp)
144  return
145 
146 end function get_peset
147 
148 !#######################################################################
149 !> Synchronize PEs in list
150 subroutine mpp_sync( pelist, do_self )
151  integer, intent(in), optional :: pelist(:)
152  logical, intent(in), optional :: do_self
153  logical :: dself
154  integer :: n
155 
156  dself=.true.; if(PRESENT(do_self))dself=do_self
157 ! if(dself)call mpp_sync_self(pelist)
158 
159  n = get_peset(pelist); if( peset(n)%count.EQ.1 )return
160 
161  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
162  call mpi_barrier( peset(n)%id, error )
163 
164  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
165 
166  return
167 end subroutine mpp_sync
168 
169 !#######################################################################
170 !> This is to check if current PE's outstanding puts are complete
171 !! but we can't use shmem_fence because we are actually waiting for
172 !! a remote PE to complete its get
173 subroutine mpp_sync_self( pelist, check, request, msg_size, msg_type)
174  integer, intent(in), optional :: pelist(:)
175  integer, intent(in), optional :: check
176  integer, intent(inout), optional :: request(:)
177  integer, intent(in ), optional :: msg_size(:)
178  integer, intent(in ), optional :: msg_type(:)
179 
180  integer :: m, my_check, rsize
181 
182  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
183  my_check = event_send
184  if(present(check)) my_check = check
185  if( my_check .NE. event_send .AND. my_check .NE. event_recv ) then
186  call mpp_error( fatal, 'mpp_sync_self: The value of optional argument check should be EVENT_SEND or EVENT_RECV')
187  endif
188 
189  if(PRESENT(request)) then
190  if( .not. present(check) ) then
191  call mpp_error(fatal, 'mpp_sync_self: check is not present when request is present')
192  endif
193  if( my_check == event_recv ) then
194  if( .not. present(msg_size) ) then
195  call mpp_error(fatal, 'mpp_sync_self: msg_size is not present when request is present and it is EVENT_RECV')
196  endif
197  if( .not. present(msg_type) ) then
198  call mpp_error(fatal, 'mpp_sync_self: msg_type is not present when request is present and it is EVENT_RECV')
199  endif
200  if(size(msg_size) .NE. size(request)) then
201  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_size and request')
202  endif
203  if(size(msg_type) .NE. size(request)) then
204  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_type and request')
205  endif
206 
207  do m = 1, size(request(:))
208  if( request(m) == mpi_request_null ) cycle
209  call mpi_wait(request(m), stat, error )
210  call mpi_get_count(stat, msg_type(m), rsize, error)
211  if(msg_size(m) .NE. rsize) then
212  call mpp_error(fatal, "mpp_sync_self: msg_size does not match size of data received")
213  endif
214  enddo
215  else
216  do m = 1, size(request(:))
217  if(request(m) .NE.mpi_request_null )call mpi_wait(request(m), stat, error )
218  enddo
219  endif
220  else
221  select case(my_check)
222  case(event_send)
223  do m = 1,cur_send_request
224  if( request_send(m).NE.mpi_request_null )call mpi_wait( request_send(m), stat, error )
225  end do
226  cur_send_request = 0
227  case(event_recv)
228  do m = 1,cur_recv_request
229  call mpi_wait( request_recv(m), stat, error )
230  call mpi_get_count(stat, type_recv(m), rsize, error)
231  if(size_recv(m) .NE. rsize) then
232  call mpp_error(fatal, "mpp_sync_self: size_recv does not match of data received")
233  endif
234  size_recv(m) = 0
235  end do
236  cur_recv_request = 0
237  end select
238  endif
239  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
240  return
241 end subroutine mpp_sync_self
242 !> @}
subroutine mpp_sync_self(pelist, check, request, msg_size, msg_type)
This is to check if current PE's outstanding puts are complete but we can't use shmem_fence because w...
subroutine mpp_error_basic(errortype, errormsg)
A very basic error handler uses ABORT and FLUSH calls, may need to use cpp to rename.
integer function stderr()
This function returns the current standard fortran unit numbers for error messages.
Definition: mpp_util.inc:50
subroutine mpp_sync(pelist, do_self)
Synchronize PEs in list.
subroutine expand_peset()
This routine will double the size of peset and copy the original peset data into the expanded one....
Definition: mpp_util.inc:1108