FMS 2025.01-dev
Flexible Modeling System
Loading...
Searching...
No Matches
mpp_util_mpi.inc
1! -*-f90-*-
2
3!***********************************************************************
4!* GNU Lesser General Public License
5!*
6!* This file is part of the GFDL Flexible Modeling System (FMS).
7!*
8!* FMS is free software: you can redistribute it and/or modify it under
9!* the terms of the GNU Lesser General Public License as published by
10!* the Free Software Foundation, either version 3 of the License, or (at
11!* your option) any later version.
12!*
13!* FMS is distributed in the hope that it will be useful, but WITHOUT
14!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15!* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16!* for more details.
17!*
18!* You should have received a copy of the GNU Lesser General Public
19!* License along with FMS. If not, see <http://www.gnu.org/licenses/>.
20!***********************************************************************
21!> @file
22!> @brief Utility routines for parallelization with MPI
23
24!> @addtogroup mpp_mod
25!> @{
26
27!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
28! !
29! MISCELLANEOUS UTILITIES: mpp_error !
30! !
31!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
32
33!> A very basic error handler
34!! uses ABORT and FLUSH calls, may need to use cpp to rename
35subroutine mpp_error_basic( errortype, errormsg )
36#ifdef __INTEL_COMPILER
37 ! Intel module containing tracebackQQ
38 use ifcore
39#endif
40 integer, intent(in) :: errortype
41 character(len=*), intent(in), optional :: errormsg
42 character(len=512) :: text
43 integer :: errunit
44
45 if( .NOT.module_is_initialized )call abort()
46
47 select case( errortype )
48 case(note)
49 text = 'NOTE' !just FYI
50 case(warning)
51 text = 'WARNING' !probable error
52 case(fatal)
53 text = 'FATAL' !fatal error
54 case default
55 text = 'WARNING: non-existent errortype (must be NOTE|WARNING|FATAL)'
56 end select
57
58 if( npes.GT.1 )write( text,'(a,i6)' )trim(text)//' from PE', pe !this is the mpp part
59 if( PRESENT(errormsg) )text = trim(text)//': '//trim(errormsg)
60!$OMP CRITICAL (MPP_ERROR_CRITICAL)
61 select case( errortype )
62 case(note)
63 if(pe==root_pe) then
64 write( out_unit,'(a)' )trim(text)
65 write( warn_unit,'(a)' )trim(text)
66 endif
67 case default
68 errunit = stderr()
69 write( errunit, '(/a/)' )trim(text)
70 if(pe==root_pe) then
71 write( out_unit,'(/a/)' )trim(text)
72 write( warn_unit,'(/a/)' )trim(text)
73 endif
74 if( errortype.EQ.fatal .OR. warnings_are_fatal )then
75 FLUSH(out_unit)
76 FLUSH(warn_unit)
77 close(warn_unit)
78#ifdef __INTEL_COMPILER
79 ! Get traceback and return quietly for correct abort
80 call tracebackqq(user_exit_code=-1)
81#elif __GFORTRAN__
82 call backtrace
83#endif
84 call mpi_abort( mpi_comm_world, 1, error )
85 end if
86 end select
87
88 error_state = errortype
89!$OMP END CRITICAL (MPP_ERROR_CRITICAL)
90
91
92end subroutine mpp_error_basic
93
94!#####################################################################
95!> Makes a PE set out of a PE list. A PE list is an ordered list of PEs
96!! a PE set is a triad (start,log2stride,size) for SHMEM, an a communicator for MPI
97!! if stride is non-uniform or not a power of 2,
98!! will return error (not required for MPI but enforced for uniformity)
99function get_peset(pelist)
100 integer :: get_peset
101 integer, intent(in), optional :: pelist(:)
102 integer :: errunit
103 integer :: i, n
104 integer, allocatable :: pelist_tmp(:)
105
106 if( .NOT.PRESENT(pelist) )then !set it to current_peset_num
107 get_peset = current_peset_num; return
108 end if
109
110 !--- first make sure pelist is monotonically increasing.
111 if (size(pelist(:)) .GT. 1) then
112 do n = 2, size(pelist(:))
113 if(pelist(n) <= pelist(n-1)) call mpp_error(fatal, "GET_PESET: pelist is not monotonically increasing")
114 enddo
115 endif
116
117 errunit = stderr()
118 if( debug )write( errunit,* )'pelist=', pelist
119
120 !find if this array matches any existing peset
121 do i = 1,peset_num
122 if( debug )write( errunit,'(a,3i6)' )'pe, i, peset_num=', pe, i, peset_num
123 if( size(pelist(:)).EQ.size(peset(i)%list(:)) )then
124 if( all(pelist.EQ.peset(i)%list) )then
125 get_peset = i; return
126 end if
127 end if
128 end do
129 !not found, so create new peset
130 peset_num = peset_num + 1
131 if( peset_num > current_peset_max ) call expand_peset()
132 i = peset_num !shorthand
133 !create list
134 allocate( peset(i)%list(size(pelist(:))) )
135 peset(i)%list(:) = pelist(:)
136 peset(i)%count = size(pelist(:))
137
138 allocate(pelist_tmp(size(pelist(:))))
139 pelist_tmp = pelist - mpp_root_pe()
140 call mpi_group_incl( peset(current_peset_num)%group, size(pelist(:)), pelist_tmp, peset(i)%group, error )
141 call mpi_comm_create_group(peset(current_peset_num)%id, peset(i)%group, &
142 default_tag, peset(i)%id, error )
143 get_peset = i
144 deallocate(pelist_tmp)
145 return
146
147end function get_peset
148
149!#######################################################################
150!> Synchronize PEs in list
151subroutine mpp_sync( pelist, do_self )
152 integer, intent(in), optional :: pelist(:)
153 logical, intent(in), optional :: do_self
154 logical :: dself
155 integer :: n
156
157 dself=.true.; if(PRESENT(do_self))dself=do_self
158! if(dself)call mpp_sync_self(pelist)
159
160 n = get_peset(pelist); if( peset(n)%count.EQ.1 )return
161
162 if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
163 call mpi_barrier( peset(n)%id, error )
164
165 if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
166
167 return
168end subroutine mpp_sync
169
170!#######################################################################
171!> This is to check if current PE's outstanding puts are complete
172!! but we can't use shmem_fence because we are actually waiting for
173!! a remote PE to complete its get
174subroutine mpp_sync_self( pelist, check, request, msg_size, msg_type)
175 integer, intent(in), optional :: pelist(:)
176 integer, intent(in), optional :: check
177 integer, intent(inout), optional :: request(:)
178 integer, intent(in ), optional :: msg_size(:)
179 integer, intent(in ), optional :: msg_type(:)
180
181 integer :: m, my_check, rsize
182
183 if( debug .and. (current_clock.NE.0) )call system_clock(start_tick)
184 my_check = event_send
185 if(present(check)) my_check = check
186 if( my_check .NE. event_send .AND. my_check .NE. event_recv ) then
187 call mpp_error( fatal, 'mpp_sync_self: The value of optional argument check should be EVENT_SEND or EVENT_RECV')
188 endif
189
190 if(PRESENT(request)) then
191 if( .not. present(check) ) then
192 call mpp_error(fatal, 'mpp_sync_self: check is not present when request is present')
193 endif
194 if( my_check == event_recv ) then
195 if( .not. present(msg_size) ) then
196 call mpp_error(fatal, 'mpp_sync_self: msg_size is not present when request is present and it is EVENT_RECV')
197 endif
198 if( .not. present(msg_type) ) then
199 call mpp_error(fatal, 'mpp_sync_self: msg_type is not present when request is present and it is EVENT_RECV')
200 endif
201 if(size(msg_size) .NE. size(request)) then
202 call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_size and request')
203 endif
204 if(size(msg_type) .NE. size(request)) then
205 call mpp_error(fatal, 'mpp_sync_self: dimension mismatch between msg_type and request')
206 endif
207
208 do m = 1, size(request(:))
209 if( request(m) == mpi_request_null ) cycle
210 call mpi_wait(request(m), stat, error )
211 call mpi_get_count(stat, msg_type(m), rsize, error)
212 if(msg_size(m) .NE. rsize) then
213 call mpp_error(fatal, "mpp_sync_self: msg_size does not match size of data received")
214 endif
215 enddo
216 else
217 do m = 1, size(request(:))
218 if(request(m) .NE.mpi_request_null )call mpi_wait(request(m), stat, error )
219 enddo
220 endif
221 else
222 select case(my_check)
223 case(event_send)
224 do m = 1,cur_send_request
225 if( request_send(m).NE.mpi_request_null )call mpi_wait( request_send(m), stat, error )
226 end do
227 cur_send_request = 0
228 case(event_recv)
229 do m = 1,cur_recv_request
230 call mpi_wait( request_recv(m), stat, error )
231 call mpi_get_count(stat, type_recv(m), rsize, error)
232 if(size_recv(m) .NE. rsize) then
233 call mpp_error(fatal, "mpp_sync_self: size_recv does not match of data received")
234 endif
235 size_recv(m) = 0
236 end do
237 cur_recv_request = 0
238 end select
239 endif
240 if( debug .and. (current_clock.NE.0) )call increment_current_clock(event_wait)
241 return
242end subroutine mpp_sync_self
243!> @}
subroutine mpp_sync_self(pelist, check, request, msg_size, msg_type)
This is to check if current PE's outstanding puts are complete but we can't use shmem_fence because w...
subroutine mpp_error_basic(errortype, errormsg)
A very basic error handler uses ABORT and FLUSH calls, may need to use cpp to rename.
integer function get_peset(pelist)
Makes a PE set out of a PE list. A PE list is an ordered list of PEs a PE set is a triad (start,...
subroutine mpp_sync(pelist, do_self)
Synchronize PEs in list.