FMS  2025.02
Flexible Modeling System
mpp_util_mpi.inc
1 ! -*-f90-*-
2 
3 !***********************************************************************
4 !* GNU Lesser General Public License
5 !*
6 !* This file is part of the GFDL Flexible Modeling System (FMS).
7 !*
8 !* FMS is free software: you can redistribute it and/or modify it under
9 !* the terms of the GNU Lesser General Public License as published by
10 !* the Free Software Foundation, either version 3 of the License, or (at
11 !* your option) any later version.
12 !*
13 !* FMS is distributed in the hope that it will be useful, but WITHOUT
14 !* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 !* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 !* for more details.
17 !*
18 !* You should have received a copy of the GNU Lesser General Public
19 !* License along with FMS. If not, see <http://www.gnu.org/licenses/>.
20 !***********************************************************************
21 !> @file
22 !> @brief Utility routines for parallelization with MPI
23 
24 !> @addtogroup mpp_mod
25 !> @{
26 
27 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
28 ! !
29 ! MISCELLANEOUS UTILITIES: mpp_error !
30 ! !
31 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
32 
33 !> A very basic error handler
34 !! uses ABORT and FLUSH calls, may need to use cpp to rename
35 subroutine mpp_error_basic( errortype, errormsg )
36 #ifdef __INTEL_COMPILER
37  ! Intel module containing tracebackQQ
38  use ifcore
39 #endif
40  integer, intent(in) :: errortype
41  character(len=*), intent(in), optional :: errormsg
42  character(len=512) :: text
43  integer :: errunit
44 
45  if( .NOT.module_is_initialized )call abort()
46 
47  select case( errortype )
48  case(note)
49  text = 'NOTE' !just FYI
50  case(warning)
51  text = 'WARNING' !probable error
52  case(fatal)
53  text = 'FATAL' !fatal error
54  case default
55  text = 'WARNING: non-existent errortype (must be NOTE|WARNING|FATAL)'
56  end select
57 
58  if( npes.GT.1 )write( text,'(a,i6)' )trim(text)//' from PE', pe !this is the mpp part
59  if( PRESENT(errormsg) )text = trim(text)//': '//trim(errormsg)
60 !$OMP CRITICAL (MPP_ERROR_CRITICAL)
61  select case( errortype )
62  case(note)
63  if(pe==root_pe) then
64  write( out_unit,'(a)' )trim(text)
65  write( warn_unit,'(a)' )trim(text)
66  endif
67  case default
68  errunit = stderr()
69  write( errunit, '(/a/)' )trim(text)
70  if(pe==root_pe) then
71  write( out_unit,'(/a/)' )trim(text)
72  write( warn_unit,'(/a/)' )trim(text)
73  endif
74  if( errortype.EQ.fatal .OR. warnings_are_fatal )then
75  FLUSH(out_unit)
76  FLUSH(warn_unit)
77  close(warn_unit)
78 #ifdef __INTEL_COMPILER
79  ! Get traceback and return quietly for correct abort
80  call tracebackqq(user_exit_code=-1)
81 #elif __GFORTRAN__
82  call backtrace
83 #endif
84  call mpi_abort( mpi_comm_world, 1, error )
85  end if
86  end select
87 
88  error_state = errortype
89 !$OMP END CRITICAL (MPP_ERROR_CRITICAL)
90 
91 
92 end subroutine mpp_error_basic
93 
94 !#####################################################################
95 !> Makes a PE set out of a PE list. A PE list is an ordered list of PEs
96 !! a PE set is a triad (start,log2stride,size) for SHMEM, an a communicator for MPI
97 !! if stride is non-uniform or not a power of 2,
98 !! will return error (not required for MPI but enforced for uniformity)
99 function get_peset(pelist)
100  integer :: get_peset
101  integer, intent(in), optional :: pelist(:)
102  integer :: errunit
103  integer :: i, n
104  integer, allocatable :: pelist_tmp(:)
105 
106  if( .NOT.PRESENT(pelist) )then !set it to current_peset_num
107  get_peset = current_peset_num; return
108  end if
109 
110  !--- first make sure pelist is monotonically increasing.
111  if (size(pelist(:)) .GT. 1) then
112  do n = 2, size(pelist(:))
113  if(pelist(n) <= pelist(n-1)) call mpp_error(fatal, "GET_PESET: pelist is not monotonically increasing")
114  enddo
115  endif
116 
117  errunit = stderr()
118  if( debug )write( errunit,* )'pelist=', pelist
119 
120  !find if this array matches any existing peset
121  do i = 1,peset_num
122  if( debug )write( errunit,'(a,3i6)' )'pe, i, peset_num=', pe, i, peset_num
123  if( size(pelist(:)).EQ.size(peset(i)%list(:)) )then
124  if( all(pelist.EQ.peset(i)%list) )then
125  get_peset = i; return
126  end if
127  end if
128  end do
129  !not found, so create new peset
130  peset_num = peset_num + 1
131  if( peset_num > current_peset_max ) call expand_peset()
132  i = peset_num !shorthand
133  !create list
134  allocate( peset(i)%list(size(pelist(:))) )
135  peset(i)%list(:) = pelist(:)
136  peset(i)%count = size(pelist(:))
137 
138  allocate(pelist_tmp(size(pelist(:))))
139  pelist_tmp = pelist - mpp_root_pe()
140  call mpi_group_incl( peset(current_peset_num)%group, size(pelist(:)), pelist_tmp, peset(i)%group, error )
141  call mpi_comm_create_group(peset(current_peset_num)%id, peset(i)%group, &
142  default_tag, peset(i)%id, error )
143  get_peset = i
144  deallocate(pelist_tmp)
145  return
146 
147 end function get_peset
148 
149 !#######################################################################
150 !> Synchronize PEs in list
151 subroutine mpp_sync( pelist, do_self )
152  integer, intent(in), optional :: pelist(:)
153  logical, intent(in), optional :: do_self
154  logical :: dself
155  integer :: n
156 
157  dself=.true.; if(PRESENT(do_self))dself=do_self
158 ! if(dself)call mpp_sync_self(pelist)
159 
160  n = get_peset(pelist); if( peset(n)%count.EQ.1 )return
161 
162  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
163  call mpi_barrier( peset(n)%id, error )
164 
165  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
166 
167  return
168 end subroutine mpp_sync
169 
170 !#######################################################################
171 !> This is to check if current PE's outstanding puts are complete
172 !! but we can't use shmem_fence because we are actually waiting for
173 !! a remote PE to complete its get
174 subroutine mpp_sync_self( pelist, check, request, msg_size, msg_type)
175  integer, intent(in), optional :: pelist(:)
176  integer, intent(in), optional :: check
177  integer, intent(inout), optional :: request(:)
178  integer, intent(in ), optional :: msg_size(:)
179  integer, intent(in ), optional :: msg_type(:)
180 
181  integer :: m, my_check, rsize
182 
183  if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
184  my_check = event_send
185  if(present(check)) my_check = check
186  if( my_check .NE. event_send .AND. my_check .NE. event_recv ) then
187  call mpp_error( fatal, 'mpp_sync_self: The value of optional argument check should be EVENT_SEND or EVENT_RECV')
188  endif
189 
190  if(PRESENT(request)) then
191  if( .not. present(check) ) then
192  call mpp_error(fatal, 'mpp_sync_self: check is not present when request is present')
193  endif
194  if( my_check == event_recv ) then
195  if( .not. present(msg_size) ) then
196  call mpp_error(fatal, 'mpp_sync_self: msg_size is not present when request is present and it is EVENT_RECV')
197  endif
198  if( .not. present(msg_type) ) then
199  call mpp_error(fatal, 'mpp_sync_self: msg_type is not present when request is present and it is EVENT_RECV')
200  endif
201  if(size(msg_size) .NE. size(request)) then
202  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_size and request')
203  endif
204  if(size(msg_type) .NE. size(request)) then
205  call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_type and request')
206  endif
207 
208  do m = 1, size(request(:))
209  if( request(m) == mpi_request_null ) cycle
210  call mpi_wait(request(m), stat, error )
211  call mpi_get_count(stat, msg_type(m), rsize, error)
212  if(msg_size(m) .NE. rsize) then
213  call mpp_error(fatal, "mpp_sync_self: msg_size does not match size of data received")
214  endif
215  enddo
216  else
217  do m = 1, size(request(:))
218  if(request(m) .NE.mpi_request_null )call mpi_wait(request(m), stat, error )
219  enddo
220  endif
221  else
222  select case(my_check)
223  case(event_send)
224  do m = 1,cur_send_request
225  if( request_send(m).NE.mpi_request_null )call mpi_wait( request_send(m), stat, error )
226  end do
227  cur_send_request = 0
228  case(event_recv)
229  do m = 1,cur_recv_request
230  call mpi_wait( request_recv(m), stat, error )
231  call mpi_get_count(stat, type_recv(m), rsize, error)
232  if(size_recv(m) .NE. rsize) then
233  call mpp_error(fatal, "mpp_sync_self: size_recv does not match of data received")
234  endif
235  size_recv(m) = 0
236  end do
237  cur_recv_request = 0
238  end select
239  endif
240  if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
241  return
242 end subroutine mpp_sync_self
243 !> @}
subroutine mpp_sync_self(pelist, check, request, msg_size, msg_type)
This is to check if current PE's outstanding puts are complete but we can't use shmem_fence because w...
subroutine mpp_error_basic(errortype, errormsg)
A very basic error handler uses ABORT and FLUSH calls, may need to use cpp to rename.
integer function stderr()
This function returns the current standard fortran unit numbers for error messages.
Definition: mpp_util.inc:51
subroutine mpp_sync(pelist, do_self)
Synchronize PEs in list.
subroutine expand_peset()
This routine will double the size of peset and copy the original peset data into the expanded one....
Definition: mpp_util.inc:1109